Compare commits
38 Commits
mykaul-pat
...
next-5.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
42a930da3b | ||
|
|
660d68953d | ||
|
|
bff9b459ef | ||
|
|
4c77b86f26 | ||
|
|
fb0afb04ae | ||
|
|
bdb93af423 | ||
|
|
2fffaf36c4 | ||
|
|
dab150f3d8 | ||
|
|
7b6db3b69a | ||
|
|
b096c0d97d | ||
|
|
55e157be4d | ||
|
|
75593a6178 | ||
|
|
bbbc4aafef | ||
|
|
b74411f0ed | ||
|
|
995ffd6ee0 | ||
|
|
88e843c9db | ||
|
|
8cfeb6f509 | ||
|
|
7adc9aa50c | ||
|
|
15d4475870 | ||
|
|
bbe5c323a9 | ||
|
|
82f70a1c19 | ||
|
|
0668dc25df | ||
|
|
148655dc21 | ||
|
|
c89e5f06ba | ||
|
|
b38d169f55 | ||
|
|
5ac40ed1a8 | ||
|
|
37e6e65211 | ||
|
|
994645c03b | ||
|
|
51ed9a0ec0 | ||
|
|
fa689c811e | ||
|
|
18774b90a7 | ||
|
|
b20a85d651 | ||
|
|
1f0f3a4464 | ||
|
|
698ac3ac4e | ||
|
|
f975b7890e | ||
|
|
5da2489e0e | ||
|
|
d9961fc6a2 | ||
|
|
3c3621db07 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -26,6 +26,8 @@ tags
|
||||
testlog
|
||||
test/*/*.reject
|
||||
.vscode
|
||||
docs/_build
|
||||
docs/poetry.lock
|
||||
compile_commands.json
|
||||
.ccls-cache/
|
||||
.mypy_cache
|
||||
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.27)
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
project(scylla)
|
||||
|
||||
@@ -8,19 +8,11 @@ list(APPEND CMAKE_MODULE_PATH
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/seastar/cmake)
|
||||
|
||||
set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE
|
||||
STRING "Choose the type of build." FORCE)
|
||||
# Set the possible values of build type for cmake-gui
|
||||
set(scylla_build_types
|
||||
"Debug" "Release" "Dev" "Sanitize" "Coverage")
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
|
||||
${scylla_build_types})
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "Release" CACHE
|
||||
STRING "Choose the type of build." FORCE)
|
||||
message(WARNING "CMAKE_BUILD_TYPE not specified, Using 'Release'")
|
||||
elseif(NOT CMAKE_BUILD_TYPE IN_LIST scylla_build_types)
|
||||
message(FATAL_ERROR "Unknown CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}. "
|
||||
"Following types are supported: ${scylla_build_types}")
|
||||
endif()
|
||||
"Debug" "Release" "Dev" "Sanitize")
|
||||
string(TOUPPER "${CMAKE_BUILD_TYPE}" build_mode)
|
||||
include(mode.${build_mode})
|
||||
include(mode.common)
|
||||
@@ -34,9 +26,7 @@ set(CMAKE_CXX_EXTENSIONS ON CACHE INTERNAL "")
|
||||
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
|
||||
|
||||
set(Seastar_TESTING ON CACHE BOOL "" FORCE)
|
||||
set(Seastar_API_LEVEL 7 CACHE STRING "" FORCE)
|
||||
set(Seastar_APPS ON CACHE BOOL "" FORCE)
|
||||
set(Seastar_EXCLUDE_APPS_FROM_ALL ON CACHE BOOL "" FORCE)
|
||||
set(Seastar_API_LEVEL 6 CACHE STRING "" FORCE)
|
||||
add_subdirectory(seastar)
|
||||
|
||||
# System libraries dependencies
|
||||
@@ -56,8 +46,6 @@ find_package(xxHash REQUIRED)
|
||||
set(scylla_gen_build_dir "${CMAKE_BINARY_DIR}/gen")
|
||||
file(MAKE_DIRECTORY "${scylla_gen_build_dir}")
|
||||
|
||||
include(add_version_library)
|
||||
generate_scylla_version()
|
||||
|
||||
add_library(scylla-main STATIC)
|
||||
target_sources(scylla-main
|
||||
@@ -78,6 +66,7 @@ target_sources(scylla-main
|
||||
debug.cc
|
||||
init.cc
|
||||
keys.cc
|
||||
message/messaging_service.cc
|
||||
multishard_mutation_query.cc
|
||||
mutation_query.cc
|
||||
partition_slice_builder.cc
|
||||
@@ -123,10 +112,8 @@ add_subdirectory(index)
|
||||
add_subdirectory(interface)
|
||||
add_subdirectory(lang)
|
||||
add_subdirectory(locator)
|
||||
add_subdirectory(message)
|
||||
add_subdirectory(mutation)
|
||||
add_subdirectory(mutation_writer)
|
||||
add_subdirectory(node_ops)
|
||||
add_subdirectory(readers)
|
||||
add_subdirectory(redis)
|
||||
add_subdirectory(replica)
|
||||
@@ -144,6 +131,7 @@ add_subdirectory(tracing)
|
||||
add_subdirectory(transport)
|
||||
add_subdirectory(types)
|
||||
add_subdirectory(utils)
|
||||
include(add_version_library)
|
||||
add_version_library(scylla_version
|
||||
release.cc)
|
||||
|
||||
@@ -165,7 +153,6 @@ target_link_libraries(scylla PRIVATE
|
||||
index
|
||||
lang
|
||||
locator
|
||||
message
|
||||
mutation
|
||||
mutation_writer
|
||||
raft
|
||||
@@ -194,8 +181,35 @@ target_link_libraries(scylla PRIVATE
|
||||
seastar
|
||||
Boost::program_options)
|
||||
|
||||
# Force SHA1 build-id generation
|
||||
set(default_linker_flags "-Wl,--build-id=sha1")
|
||||
include(CheckLinkerFlag)
|
||||
set(Scylla_USE_LINKER
|
||||
""
|
||||
CACHE
|
||||
STRING
|
||||
"Use specified linker instead of the default one")
|
||||
if(Scylla_USE_LINKER)
|
||||
set(linkers "${Scylla_USE_LINKER}")
|
||||
else()
|
||||
set(linkers "lld" "gold")
|
||||
endif()
|
||||
|
||||
foreach(linker ${linkers})
|
||||
set(linker_flag "-fuse-ld=${linker}")
|
||||
check_linker_flag(CXX ${linker_flag} "CXX_LINKER_HAVE_${linker}")
|
||||
if(CXX_LINKER_HAVE_${linker})
|
||||
string(APPEND default_linker_flags " ${linker_flag}")
|
||||
break()
|
||||
elseif(Scylla_USE_LINKER)
|
||||
message(FATAL_ERROR "${Scylla_USE_LINKER} is not supported.")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${default_linker_flags}" CACHE INTERNAL "")
|
||||
|
||||
# TODO: patch dynamic linker to match configure.py behavior
|
||||
|
||||
target_include_directories(scylla PRIVATE
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"${scylla_gen_build_dir}")
|
||||
|
||||
add_subdirectory(dist)
|
||||
|
||||
@@ -7,7 +7,6 @@ Options:
|
||||
-h|--help show this help message.
|
||||
-o|--output-dir PATH specify destination path at which the version files are to be created.
|
||||
-d|--date-stamp DATE manually set date for release parameter
|
||||
-v|--verbose also print out the version number
|
||||
|
||||
By default, the script will attempt to parse 'version' file
|
||||
in the current directory, which should contain a string of
|
||||
@@ -34,7 +33,6 @@ END
|
||||
)
|
||||
|
||||
DATE=""
|
||||
PRINT_VERSION=false
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
opt="$1"
|
||||
@@ -53,10 +51,6 @@ while [ $# -gt 0 ]; do
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
-v|--verbose)
|
||||
PRINT_VERSION=true
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
echo "Unexpected argument found: $1"
|
||||
echo
|
||||
@@ -78,7 +72,7 @@ fi
|
||||
|
||||
# Default scylla product/version tags
|
||||
PRODUCT=scylla
|
||||
VERSION=5.5.0-dev
|
||||
VERSION=5.3.0-rc1
|
||||
|
||||
if test -f version
|
||||
then
|
||||
@@ -108,9 +102,7 @@ if [ -f "$OUTPUT_DIR/SCYLLA-RELEASE-FILE" ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
if $PRINT_VERSION; then
|
||||
echo "$SCYLLA_VERSION-$SCYLLA_RELEASE"
|
||||
fi
|
||||
echo "$SCYLLA_VERSION-$SCYLLA_RELEASE"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
echo "$SCYLLA_VERSION" > "$OUTPUT_DIR/SCYLLA-VERSION-FILE"
|
||||
echo "$SCYLLA_RELEASE" > "$OUTPUT_DIR/SCYLLA-RELEASE-FILE"
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <seastar/json/json_elements.hh>
|
||||
#include <boost/algorithm/cxx11/any_of.hpp>
|
||||
#include "collection_mutation.hh"
|
||||
#include "db/query_context.hh"
|
||||
#include "schema/schema.hh"
|
||||
#include "db/tags/extension.hh"
|
||||
#include "db/tags/utils.hh"
|
||||
@@ -59,28 +60,7 @@ logging::logger elogger("alternator-executor");
|
||||
|
||||
namespace alternator {
|
||||
|
||||
enum class table_status {
|
||||
active = 0,
|
||||
creating,
|
||||
updating,
|
||||
deleting
|
||||
};
|
||||
|
||||
static sstring_view table_status_to_sstring(table_status tbl_status) {
|
||||
switch(tbl_status) {
|
||||
case table_status::active:
|
||||
return "ACTIVE";
|
||||
case table_status::creating:
|
||||
return "CREATING";
|
||||
case table_status::updating:
|
||||
return "UPDATING";
|
||||
case table_status::deleting:
|
||||
return "DELETING";
|
||||
}
|
||||
return "UKNOWN";
|
||||
}
|
||||
|
||||
static lw_shared_ptr<keyspace_metadata> create_keyspace_metadata(std::string_view keyspace_name, service::storage_proxy& sp, gms::gossiper& gossiper, api::timestamp_type);
|
||||
static future<std::vector<mutation>> create_keyspace(std::string_view keyspace_name, service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper, api::timestamp_type);
|
||||
|
||||
static map_type attrs_type() {
|
||||
static thread_local auto t = map_type_impl::get_instance(utf8_type, bytes_type, true);
|
||||
@@ -211,8 +191,9 @@ static std::string lsi_name(const std::string& table_name, std::string_view inde
|
||||
|
||||
/** Extract table name from a request.
|
||||
* Most requests expect the table's name to be listed in a "TableName" field.
|
||||
* This convenience function returns the name or api_error in case the
|
||||
* table name is missing or not a string.
|
||||
* This convenience function returns the name, with appropriate validation
|
||||
* and api_error in case the table name is missing or not a string, or
|
||||
* doesn't pass validate_table_name().
|
||||
*/
|
||||
static std::optional<std::string> find_table_name(const rjson::value& request) {
|
||||
const rjson::value* table_name_value = rjson::find(request, "TableName");
|
||||
@@ -223,6 +204,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
|
||||
throw api_error::validation("Non-string TableName field in request");
|
||||
}
|
||||
std::string table_name = table_name_value->GetString();
|
||||
validate_table_name(table_name);
|
||||
return table_name;
|
||||
}
|
||||
|
||||
@@ -249,10 +231,6 @@ schema_ptr executor::find_table(service::storage_proxy& proxy, const rjson::valu
|
||||
try {
|
||||
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + sstring(*table_name), *table_name);
|
||||
} catch(data_dictionary::no_such_column_family&) {
|
||||
// DynamoDB returns validation error even when table does not exist
|
||||
// and the table name is invalid.
|
||||
validate_table_name(table_name.value());
|
||||
|
||||
throw api_error::resource_not_found(
|
||||
format("Requested resource not found: Table: {} not found", *table_name));
|
||||
}
|
||||
@@ -303,10 +281,6 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
|
||||
try {
|
||||
return { proxy.data_dictionary().find_schema(sstring(internal_ks_name), sstring(internal_table_name)), type };
|
||||
} catch (data_dictionary::no_such_column_family&) {
|
||||
// DynamoDB returns validation error even when table does not exist
|
||||
// and the table name is invalid.
|
||||
validate_table_name(table_name);
|
||||
|
||||
throw api_error::resource_not_found(
|
||||
format("Requested resource not found: Internal table: {}.{} not found", internal_ks_name, internal_table_name));
|
||||
}
|
||||
@@ -442,91 +416,6 @@ static rjson::value generate_arn_for_index(const schema& schema, std::string_vie
|
||||
schema.ks_name(), schema.cf_name(), index_name));
|
||||
}
|
||||
|
||||
static rjson::value fill_table_description(schema_ptr schema, table_status tbl_status, service::storage_proxy const& proxy)
|
||||
{
|
||||
rjson::value table_description = rjson::empty_object();
|
||||
rjson::add(table_description, "TableName", rjson::from_string(schema->cf_name()));
|
||||
// FIXME: take the tables creation time, not the current time!
|
||||
size_t creation_date_seconds = std::chrono::duration_cast<std::chrono::seconds>(gc_clock::now().time_since_epoch()).count();
|
||||
// FIXME: In DynamoDB the CreateTable implementation is asynchronous, and
|
||||
// the table may be in "Creating" state until creating is finished.
|
||||
// We don't currently do this in Alternator - instead CreateTable waits
|
||||
// until the table is really available. So/ DescribeTable returns either
|
||||
// ACTIVE or doesn't exist at all (and DescribeTable returns an error).
|
||||
// The states CREATING and UPDATING are not currently returned.
|
||||
rjson::add(table_description, "TableStatus", rjson::from_string(table_status_to_sstring(tbl_status)));
|
||||
rjson::add(table_description, "TableArn", generate_arn_for_table(*schema));
|
||||
rjson::add(table_description, "TableId", rjson::from_string(schema->id().to_sstring()));
|
||||
// FIXME: Instead of hardcoding, we should take into account which mode was chosen
|
||||
// when the table was created. But, Spark jobs expect something to be returned
|
||||
// and PAY_PER_REQUEST seems closer to reality than PROVISIONED.
|
||||
rjson::add(table_description, "BillingModeSummary", rjson::empty_object());
|
||||
rjson::add(table_description["BillingModeSummary"], "BillingMode", "PAY_PER_REQUEST");
|
||||
rjson::add(table_description["BillingModeSummary"], "LastUpdateToPayPerRequestDateTime", rjson::value(creation_date_seconds));
|
||||
// In PAY_PER_REQUEST billing mode, provisioned capacity should return 0
|
||||
rjson::add(table_description, "ProvisionedThroughput", rjson::empty_object());
|
||||
rjson::add(table_description["ProvisionedThroughput"], "ReadCapacityUnits", 0);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", 0);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);
|
||||
|
||||
|
||||
|
||||
data_dictionary::table t = proxy.data_dictionary().find_column_family(schema);
|
||||
|
||||
if (tbl_status != table_status::deleting) {
|
||||
rjson::add(table_description, "CreationDateTime", rjson::value(creation_date_seconds));
|
||||
std::unordered_map<std::string,std::string> key_attribute_types;
|
||||
// Add base table's KeySchema and collect types for AttributeDefinitions:
|
||||
executor::describe_key_schema(table_description, *schema, key_attribute_types);
|
||||
if (!t.views().empty()) {
|
||||
rjson::value gsi_array = rjson::empty_array();
|
||||
rjson::value lsi_array = rjson::empty_array();
|
||||
for (const view_ptr& vptr : t.views()) {
|
||||
rjson::value view_entry = rjson::empty_object();
|
||||
const sstring& cf_name = vptr->cf_name();
|
||||
size_t delim_it = cf_name.find(':');
|
||||
if (delim_it == sstring::npos) {
|
||||
elogger.error("Invalid internal index table name: {}", cf_name);
|
||||
continue;
|
||||
}
|
||||
sstring index_name = cf_name.substr(delim_it + 1);
|
||||
rjson::add(view_entry, "IndexName", rjson::from_string(index_name));
|
||||
rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
|
||||
// Add indexes's KeySchema and collect types for AttributeDefinitions:
|
||||
executor::describe_key_schema(view_entry, *vptr, key_attribute_types);
|
||||
// Add projection type
|
||||
rjson::value projection = rjson::empty_object();
|
||||
rjson::add(projection, "ProjectionType", "ALL");
|
||||
// FIXME: we have to get ProjectionType from the schema when it is added
|
||||
rjson::add(view_entry, "Projection", std::move(projection));
|
||||
// Local secondary indexes are marked by an extra '!' sign occurring before the ':' delimiter
|
||||
rjson::value& index_array = (delim_it > 1 && cf_name[delim_it-1] == '!') ? lsi_array : gsi_array;
|
||||
rjson::push_back(index_array, std::move(view_entry));
|
||||
}
|
||||
if (!lsi_array.Empty()) {
|
||||
rjson::add(table_description, "LocalSecondaryIndexes", std::move(lsi_array));
|
||||
}
|
||||
if (!gsi_array.Empty()) {
|
||||
rjson::add(table_description, "GlobalSecondaryIndexes", std::move(gsi_array));
|
||||
}
|
||||
}
|
||||
// Use map built by describe_key_schema() for base and indexes to produce
|
||||
// AttributeDefinitions for all key columns:
|
||||
rjson::value attribute_definitions = rjson::empty_array();
|
||||
for (auto& type : key_attribute_types) {
|
||||
rjson::value key = rjson::empty_object();
|
||||
rjson::add(key, "AttributeName", rjson::from_string(type.first));
|
||||
rjson::add(key, "AttributeType", rjson::from_string(type.second));
|
||||
rjson::push_back(attribute_definitions, std::move(key));
|
||||
}
|
||||
rjson::add(table_description, "AttributeDefinitions", std::move(attribute_definitions));
|
||||
}
|
||||
executor::supplement_table_stream_info(table_description, *schema, proxy);
|
||||
|
||||
// FIXME: still missing some response fields (issue #5026)
|
||||
return table_description;
|
||||
}
|
||||
|
||||
bool is_alternator_keyspace(const sstring& ks_name) {
|
||||
return ks_name.find(executor::KEYSPACE_NAME_PREFIX) == 0;
|
||||
}
|
||||
@@ -543,7 +432,85 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
|
||||
|
||||
tracing::add_table_name(trace_state, schema->ks_name(), schema->cf_name());
|
||||
|
||||
rjson::value table_description = fill_table_description(schema, table_status::active, _proxy);
|
||||
rjson::value table_description = rjson::empty_object();
|
||||
rjson::add(table_description, "TableName", rjson::from_string(schema->cf_name()));
|
||||
// FIXME: take the tables creation time, not the current time!
|
||||
size_t creation_date_seconds = std::chrono::duration_cast<std::chrono::seconds>(gc_clock::now().time_since_epoch()).count();
|
||||
rjson::add(table_description, "CreationDateTime", rjson::value(creation_date_seconds));
|
||||
// FIXME: In DynamoDB the CreateTable implementation is asynchronous, and
|
||||
// the table may be in "Creating" state until creating is finished.
|
||||
// We don't currently do this in Alternator - instead CreateTable waits
|
||||
// until the table is really available. So/ DescribeTable returns either
|
||||
// ACTIVE or doesn't exist at all (and DescribeTable returns an error).
|
||||
// The other states (CREATING, UPDATING, DELETING) are not currently
|
||||
// returned.
|
||||
rjson::add(table_description, "TableStatus", "ACTIVE");
|
||||
rjson::add(table_description, "TableArn", generate_arn_for_table(*schema));
|
||||
rjson::add(table_description, "TableId", rjson::from_string(schema->id().to_sstring()));
|
||||
// FIXME: Instead of hardcoding, we should take into account which mode was chosen
|
||||
// when the table was created. But, Spark jobs expect something to be returned
|
||||
// and PAY_PER_REQUEST seems closer to reality than PROVISIONED.
|
||||
rjson::add(table_description, "BillingModeSummary", rjson::empty_object());
|
||||
rjson::add(table_description["BillingModeSummary"], "BillingMode", "PAY_PER_REQUEST");
|
||||
rjson::add(table_description["BillingModeSummary"], "LastUpdateToPayPerRequestDateTime", rjson::value(creation_date_seconds));
|
||||
// In PAY_PER_REQUEST billing mode, provisioned capacity should return 0
|
||||
rjson::add(table_description, "ProvisionedThroughput", rjson::empty_object());
|
||||
rjson::add(table_description["ProvisionedThroughput"], "ReadCapacityUnits", 0);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", 0);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);
|
||||
|
||||
std::unordered_map<std::string,std::string> key_attribute_types;
|
||||
// Add base table's KeySchema and collect types for AttributeDefinitions:
|
||||
describe_key_schema(table_description, *schema, key_attribute_types);
|
||||
|
||||
data_dictionary::table t = _proxy.data_dictionary().find_column_family(schema);
|
||||
if (!t.views().empty()) {
|
||||
rjson::value gsi_array = rjson::empty_array();
|
||||
rjson::value lsi_array = rjson::empty_array();
|
||||
for (const view_ptr& vptr : t.views()) {
|
||||
rjson::value view_entry = rjson::empty_object();
|
||||
const sstring& cf_name = vptr->cf_name();
|
||||
size_t delim_it = cf_name.find(':');
|
||||
if (delim_it == sstring::npos) {
|
||||
elogger.error("Invalid internal index table name: {}", cf_name);
|
||||
continue;
|
||||
}
|
||||
sstring index_name = cf_name.substr(delim_it + 1);
|
||||
rjson::add(view_entry, "IndexName", rjson::from_string(index_name));
|
||||
rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
|
||||
// Add indexes's KeySchema and collect types for AttributeDefinitions:
|
||||
describe_key_schema(view_entry, *vptr, key_attribute_types);
|
||||
// Add projection type
|
||||
rjson::value projection = rjson::empty_object();
|
||||
rjson::add(projection, "ProjectionType", "ALL");
|
||||
// FIXME: we have to get ProjectionType from the schema when it is added
|
||||
rjson::add(view_entry, "Projection", std::move(projection));
|
||||
// Local secondary indexes are marked by an extra '!' sign occurring before the ':' delimiter
|
||||
rjson::value& index_array = (delim_it > 1 && cf_name[delim_it-1] == '!') ? lsi_array : gsi_array;
|
||||
rjson::push_back(index_array, std::move(view_entry));
|
||||
}
|
||||
if (!lsi_array.Empty()) {
|
||||
rjson::add(table_description, "LocalSecondaryIndexes", std::move(lsi_array));
|
||||
}
|
||||
if (!gsi_array.Empty()) {
|
||||
rjson::add(table_description, "GlobalSecondaryIndexes", std::move(gsi_array));
|
||||
}
|
||||
}
|
||||
// Use map built by describe_key_schema() for base and indexes to produce
|
||||
// AttributeDefinitions for all key columns:
|
||||
rjson::value attribute_definitions = rjson::empty_array();
|
||||
for (auto& type : key_attribute_types) {
|
||||
rjson::value key = rjson::empty_object();
|
||||
rjson::add(key, "AttributeName", rjson::from_string(type.first));
|
||||
rjson::add(key, "AttributeType", rjson::from_string(type.second));
|
||||
rjson::push_back(attribute_definitions, std::move(key));
|
||||
}
|
||||
rjson::add(table_description, "AttributeDefinitions", std::move(attribute_definitions));
|
||||
|
||||
supplement_table_stream_info(table_description, *schema, _proxy);
|
||||
|
||||
// FIXME: still missing some response fields (issue #5026)
|
||||
|
||||
rjson::value response = rjson::empty_object();
|
||||
rjson::add(response, "Table", std::move(table_description));
|
||||
elogger.trace("returning {}", response);
|
||||
@@ -555,17 +522,10 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
|
||||
elogger.trace("Deleting table {}", request);
|
||||
|
||||
std::string table_name = get_table_name(request);
|
||||
// DynamoDB returns validation error even when table does not exist
|
||||
// and the table name is invalid.
|
||||
validate_table_name(table_name);
|
||||
|
||||
std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
|
||||
tracing::add_table_name(trace_state, keyspace_name, table_name);
|
||||
auto& p = _proxy.container();
|
||||
|
||||
schema_ptr schema = get_table(_proxy, request);
|
||||
rjson::value table_description = fill_table_description(schema, table_status::deleting, _proxy);
|
||||
|
||||
co_await _mm.container().invoke_on(0, [&] (service::migration_manager& mm) -> future<> {
|
||||
// FIXME: the following needs to be in a loop. If mm.announce() below
|
||||
// fails, we need to retry the whole thing.
|
||||
@@ -575,14 +535,18 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
|
||||
throw api_error::resource_not_found(format("Requested resource not found: Table: {} not found", table_name));
|
||||
}
|
||||
|
||||
auto m = co_await service::prepare_column_family_drop_announcement(_proxy, keyspace_name, table_name, group0_guard.write_timestamp(), service::drop_views::yes);
|
||||
auto m2 = co_await service::prepare_keyspace_drop_announcement(_proxy.local_db(), keyspace_name, group0_guard.write_timestamp());
|
||||
auto m = co_await mm.prepare_column_family_drop_announcement(keyspace_name, table_name, group0_guard.write_timestamp(), service::migration_manager::drop_views::yes);
|
||||
auto m2 = co_await mm.prepare_keyspace_drop_announcement(keyspace_name, group0_guard.write_timestamp());
|
||||
|
||||
std::move(m2.begin(), m2.end(), std::back_inserter(m));
|
||||
|
||||
co_await mm.announce(std::move(m), std::move(group0_guard), format("alternator-executor: delete {} table", table_name));
|
||||
co_await mm.announce(std::move(m), std::move(group0_guard));
|
||||
});
|
||||
|
||||
// FIXME: need more attributes?
|
||||
rjson::value table_description = rjson::empty_object();
|
||||
rjson::add(table_description, "TableName", rjson::from_string(table_name));
|
||||
rjson::add(table_description, "TableStatus", "DELETING");
|
||||
rjson::value response = rjson::empty_object();
|
||||
rjson::add(response, "TableDescription", std::move(table_description));
|
||||
elogger.trace("returning {}", response);
|
||||
@@ -867,6 +831,17 @@ future<executor::request_return_type> executor::list_tags_of_resource(client_sta
|
||||
return make_ready_future<executor::request_return_type>(make_jsonable(std::move(ret)));
|
||||
}
|
||||
|
||||
static future<> wait_for_schema_agreement(service::migration_manager& mm, db::timeout_clock::time_point deadline) {
|
||||
return do_until([&mm, deadline] {
|
||||
if (db::timeout_clock::now() > deadline) {
|
||||
throw std::runtime_error("Unable to reach schema agreement");
|
||||
}
|
||||
return mm.have_schema_agreement();
|
||||
}, [] {
|
||||
return seastar::sleep(500ms);
|
||||
});
|
||||
}
|
||||
|
||||
static void verify_billing_mode(const rjson::value& request) {
|
||||
// Alternator does not yet support billing or throughput limitations, but
|
||||
// let's verify that BillingMode is at least legal.
|
||||
@@ -884,38 +859,6 @@ static void verify_billing_mode(const rjson::value& request) {
|
||||
}
|
||||
}
|
||||
|
||||
// Validate that a AttributeDefinitions parameter in CreateTable is valid, and
|
||||
// throws user-facing api_error::validation if it's not.
|
||||
// In particular, verify that the same AttributeName doesn't appear more than
|
||||
// once (Issue #13870).
|
||||
static void validate_attribute_definitions(const rjson::value& attribute_definitions){
|
||||
if (!attribute_definitions.IsArray()) {
|
||||
throw api_error::validation("AttributeDefinitions must be an array");
|
||||
}
|
||||
std::unordered_set<std::string> seen_attribute_names;
|
||||
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
|
||||
const rjson::value* attribute_name = rjson::find(*it, "AttributeName");
|
||||
if (!attribute_name) {
|
||||
throw api_error::validation("AttributeName missing in AttributeDefinitions");
|
||||
}
|
||||
if (!attribute_name->IsString()) {
|
||||
throw api_error::validation("AttributeName in AttributeDefinitions must be a string");
|
||||
}
|
||||
auto [it2, added] = seen_attribute_names.emplace(rjson::to_string_view(*attribute_name));
|
||||
if (!added) {
|
||||
throw api_error::validation(format("Duplicate AttributeName={} in AttributeDefinitions",
|
||||
rjson::to_string_view(*attribute_name)));
|
||||
}
|
||||
const rjson::value* attribute_type = rjson::find(*it, "AttributeType");
|
||||
if (!attribute_type) {
|
||||
throw api_error::validation("AttributeType missing in AttributeDefinitions");
|
||||
}
|
||||
if (!attribute_type->IsString()) {
|
||||
throw api_error::validation("AttributeType in AttributeDefinitions must be a string");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static future<executor::request_return_type> create_table_on_shard0(tracing::trace_state_ptr trace_state, rjson::value request, service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper) {
|
||||
assert(this_shard_id() == 0);
|
||||
|
||||
@@ -924,14 +867,11 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
|
||||
// (e.g., verify that this table doesn't already exist) - we can only
|
||||
// do this further down - after taking group0_guard.
|
||||
std::string table_name = get_table_name(request);
|
||||
validate_table_name(table_name);
|
||||
|
||||
if (table_name.find(executor::INTERNAL_TABLE_PREFIX) == 0) {
|
||||
co_return api_error::validation(format("Prefix {} is reserved for accessing internal tables", executor::INTERNAL_TABLE_PREFIX));
|
||||
}
|
||||
std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
|
||||
const rjson::value& attribute_definitions = request["AttributeDefinitions"];
|
||||
validate_attribute_definitions(attribute_definitions);
|
||||
|
||||
tracing::add_table_name(trace_state, keyspace_name, table_name);
|
||||
|
||||
@@ -1121,9 +1061,8 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
|
||||
auto group0_guard = co_await mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
std::vector<mutation> schema_mutations;
|
||||
auto ksm = create_keyspace_metadata(keyspace_name, sp, gossiper, ts);
|
||||
try {
|
||||
schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
|
||||
schema_mutations = co_await create_keyspace(keyspace_name, sp, mm, gossiper, ts);
|
||||
} catch (exceptions::already_exists_exception&) {
|
||||
if (sp.data_dictionary().has_schema(keyspace_name, table_name)) {
|
||||
co_return api_error::resource_in_use(format("Table {} already exists", table_name));
|
||||
@@ -1133,14 +1072,22 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
|
||||
// This should never happen, the ID is supposed to be unique
|
||||
co_return api_error::internal(format("Table with ID {} already exists", schema->id()));
|
||||
}
|
||||
co_await service::prepare_new_column_family_announcement(schema_mutations, sp, *ksm, schema, ts);
|
||||
db::schema_tables::add_table_or_view_to_schema_mutation(schema, ts, true, schema_mutations);
|
||||
// we must call before_create_column_family callbacks - which allow
|
||||
// listeners to modify our schema_mutations. For example, CDC may add
|
||||
// another table (the CDC log table) to the same keyspace.
|
||||
// Unfortunately the convention is that this callback must be run in
|
||||
// a Seastar thread.
|
||||
co_await seastar::async([&] {
|
||||
mm.get_notifier().before_create_column_family(*schema, schema_mutations, ts);
|
||||
});
|
||||
for (schema_builder& view_builder : view_builders) {
|
||||
db::schema_tables::add_table_or_view_to_schema_mutation(
|
||||
view_ptr(view_builder.build()), ts, true, schema_mutations);
|
||||
}
|
||||
co_await mm.announce(std::move(schema_mutations), std::move(group0_guard), format("alternator-executor: create {} table", table_name));
|
||||
co_await mm.announce(std::move(schema_mutations), std::move(group0_guard));
|
||||
|
||||
co_await mm.wait_for_schema_agreement(sp.local_db(), db::timeout_clock::now() + 10s, nullptr);
|
||||
co_await wait_for_schema_agreement(mm, db::timeout_clock::now() + 10s);
|
||||
rjson::value status = rjson::empty_object();
|
||||
executor::supplement_table_info(request, *schema, sp);
|
||||
rjson::add(status, "TableDescription", std::move(request));
|
||||
@@ -1203,11 +1150,11 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
|
||||
auto schema = builder.build();
|
||||
|
||||
auto m = co_await service::prepare_column_family_update_announcement(p.local(), schema, false, std::vector<view_ptr>(), group0_guard.write_timestamp());
|
||||
auto m = co_await mm.prepare_column_family_update_announcement(schema, false, std::vector<view_ptr>(), group0_guard.write_timestamp());
|
||||
|
||||
co_await mm.announce(std::move(m), std::move(group0_guard), format("alternator-executor: update {} table", tab->cf_name()));
|
||||
co_await mm.announce(std::move(m), std::move(group0_guard));
|
||||
|
||||
co_await mm.wait_for_schema_agreement(p.local().local_db(), db::timeout_clock::now() + 10s, nullptr);
|
||||
co_await wait_for_schema_agreement(mm, db::timeout_clock::now() + 10s);
|
||||
|
||||
rjson::value status = rjson::empty_object();
|
||||
supplement_table_info(request, *schema, p.local());
|
||||
@@ -1643,7 +1590,7 @@ static parsed::condition_expression get_parsed_condition_expression(rjson::value
|
||||
throw api_error::validation("ConditionExpression must not be empty");
|
||||
}
|
||||
try {
|
||||
return parse_condition_expression(rjson::to_string_view(*condition_expression), "ConditionExpression");
|
||||
return parse_condition_expression(rjson::to_string_view(*condition_expression));
|
||||
} catch(expressions_syntax_error& e) {
|
||||
throw api_error::validation(e.what());
|
||||
}
|
||||
@@ -1658,16 +1605,17 @@ static bool check_needs_read_before_write(const parsed::condition_expression& co
|
||||
|
||||
// Fail the expression if it has unused attribute names or values. This is
|
||||
// how DynamoDB behaves, so we do too.
|
||||
static void verify_all_are_used(const rjson::value* field,
|
||||
const std::unordered_set<std::string>& used, const char* field_name, const char* operation) {
|
||||
if (!field) {
|
||||
static void verify_all_are_used(const rjson::value& req, const char* field,
|
||||
const std::unordered_set<std::string>& used, const char* operation) {
|
||||
const rjson::value* attribute_names = rjson::find(req, field);
|
||||
if (!attribute_names) {
|
||||
return;
|
||||
}
|
||||
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
|
||||
for (auto it = attribute_names->MemberBegin(); it != attribute_names->MemberEnd(); ++it) {
|
||||
if (!used.contains(it->name.GetString())) {
|
||||
throw api_error::validation(
|
||||
format("{} has spurious '{}', not used in {}",
|
||||
field_name, it->name.GetString(), operation));
|
||||
field, it->name.GetString(), operation));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1694,8 +1642,8 @@ public:
|
||||
resolve_condition_expression(_condition_expression,
|
||||
expression_attribute_names, expression_attribute_values,
|
||||
used_attribute_names, used_attribute_values);
|
||||
verify_all_are_used(expression_attribute_names, used_attribute_names,"ExpressionAttributeNames", "PutItem");
|
||||
verify_all_are_used(expression_attribute_values, used_attribute_values,"ExpressionAttributeValues", "PutItem");
|
||||
verify_all_are_used(_request, "ExpressionAttributeNames", used_attribute_names, "PutItem");
|
||||
verify_all_are_used(_request, "ExpressionAttributeValues", used_attribute_values, "PutItem");
|
||||
} else {
|
||||
if (expression_attribute_names) {
|
||||
throw api_error::validation("ExpressionAttributeNames cannot be used without ConditionExpression");
|
||||
@@ -1779,8 +1727,8 @@ public:
|
||||
resolve_condition_expression(_condition_expression,
|
||||
expression_attribute_names, expression_attribute_values,
|
||||
used_attribute_names, used_attribute_values);
|
||||
verify_all_are_used(expression_attribute_names, used_attribute_names,"ExpressionAttributeNames", "DeleteItem");
|
||||
verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "DeleteItem");
|
||||
verify_all_are_used(_request, "ExpressionAttributeNames", used_attribute_names, "DeleteItem");
|
||||
verify_all_are_used(_request, "ExpressionAttributeValues", used_attribute_values, "DeleteItem");
|
||||
} else {
|
||||
if (expression_attribute_names) {
|
||||
throw api_error::validation("ExpressionAttributeNames cannot be used without ConditionExpression");
|
||||
@@ -2553,8 +2501,8 @@ update_item_operation::update_item_operation(service::storage_proxy& proxy, rjso
|
||||
expression_attribute_names, expression_attribute_values,
|
||||
used_attribute_names, used_attribute_values);
|
||||
|
||||
verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "UpdateItem");
|
||||
verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "UpdateItem");
|
||||
verify_all_are_used(_request, "ExpressionAttributeNames", used_attribute_names, "UpdateItem");
|
||||
verify_all_are_used(_request, "ExpressionAttributeValues", used_attribute_values, "UpdateItem");
|
||||
|
||||
// DynamoDB forbids having both old-style AttributeUpdates or Expected
|
||||
// and new-style UpdateExpression or ConditionExpression in the same request
|
||||
@@ -3163,8 +3111,7 @@ future<executor::request_return_type> executor::get_item(client_state& client_st
|
||||
|
||||
std::unordered_set<std::string> used_attribute_names;
|
||||
auto attrs_to_get = calculate_attrs_to_get(request, used_attribute_names);
|
||||
const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
|
||||
verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "GetItem");
|
||||
verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "GetItem");
|
||||
|
||||
return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl,
|
||||
service::storage_proxy::coordinator_query_options(executor::default_timeout(), std::move(permit), client_state, trace_state)).then(
|
||||
@@ -3275,8 +3222,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
|
||||
rs.cl = get_read_consistency(it->value);
|
||||
std::unordered_set<std::string> used_attribute_names;
|
||||
rs.attrs_to_get = ::make_shared<const std::optional<attrs_to_get>>(calculate_attrs_to_get(it->value, used_attribute_names));
|
||||
const rjson::value* expression_attribute_names = rjson::find(it->value, "ExpressionAttributeNames");
|
||||
verify_all_are_used(expression_attribute_names, used_attribute_names,"ExpressionAttributeNames", "GetItem");
|
||||
verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "GetItem");
|
||||
auto& keys = (it->value)["Keys"];
|
||||
for (rjson::value& key : keys.GetArray()) {
|
||||
rs.add(key);
|
||||
@@ -3445,7 +3391,7 @@ filter::filter(const rjson::value& request, request_type rt,
|
||||
throw api_error::validation("Cannot use both old-style and new-style parameters in same request: FilterExpression and AttributesToGet");
|
||||
}
|
||||
try {
|
||||
auto parsed = parse_condition_expression(rjson::to_string_view(*expression), "FilterExpression");
|
||||
auto parsed = parse_condition_expression(rjson::to_string_view(*expression));
|
||||
const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
|
||||
const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
|
||||
resolve_condition_expression(parsed,
|
||||
@@ -3849,10 +3795,8 @@ future<executor::request_return_type> executor::scan(client_state& client_state,
|
||||
// optimized the filtering by modifying partition_ranges and/or
|
||||
// ck_bounds. We haven't done this optimization yet.
|
||||
|
||||
const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
|
||||
const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
|
||||
verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Scan");
|
||||
verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Scan");
|
||||
verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "Scan");
|
||||
verify_all_are_used(request, "ExpressionAttributeValues", used_attribute_values, "Scan");
|
||||
|
||||
return do_query(_proxy, schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
|
||||
std::move(filter), query::partition_slice::option_set(), client_state, _stats.cql_stats, trace_state, std::move(permit));
|
||||
@@ -4073,7 +4017,7 @@ calculate_bounds_condition_expression(schema_ptr schema,
|
||||
// sort-key range.
|
||||
parsed::condition_expression p;
|
||||
try {
|
||||
p = parse_condition_expression(rjson::to_string_view(expression), "KeyConditionExpression");
|
||||
p = parse_condition_expression(rjson::to_string_view(expression));
|
||||
} catch(expressions_syntax_error& e) {
|
||||
throw api_error::validation(e.what());
|
||||
}
|
||||
@@ -4293,17 +4237,13 @@ future<executor::request_return_type> executor::query(client_state& client_state
|
||||
throw api_error::validation("Query must have one of "
|
||||
"KeyConditions or KeyConditionExpression");
|
||||
}
|
||||
|
||||
const rjson::value* expression_attribute_names = rjson::find(request, "ExpressionAttributeNames");
|
||||
const rjson::value* expression_attribute_values = rjson::find(request, "ExpressionAttributeValues");
|
||||
|
||||
// exactly one of key_conditions or key_condition_expression
|
||||
auto [partition_ranges, ck_bounds] = key_conditions
|
||||
? calculate_bounds_conditions(schema, *key_conditions)
|
||||
: calculate_bounds_condition_expression(schema, *key_condition_expression,
|
||||
expression_attribute_values,
|
||||
rjson::find(request, "ExpressionAttributeValues"),
|
||||
used_attribute_values,
|
||||
expression_attribute_names,
|
||||
rjson::find(request, "ExpressionAttributeNames"),
|
||||
used_attribute_names);
|
||||
|
||||
filter filter(request, filter::request_type::QUERY,
|
||||
@@ -4330,8 +4270,8 @@ future<executor::request_return_type> executor::query(client_state& client_state
|
||||
select_type select = parse_select(request, table_type);
|
||||
|
||||
auto attrs_to_get = calculate_attrs_to_get(request, used_attribute_names, select);
|
||||
verify_all_are_used(expression_attribute_names, used_attribute_names, "ExpressionAttributeNames", "Query");
|
||||
verify_all_are_used(expression_attribute_values, used_attribute_values, "ExpressionAttributeValues", "Query");
|
||||
verify_all_are_used(request, "ExpressionAttributeValues", used_attribute_values, "Query");
|
||||
verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "Query");
|
||||
query::partition_slice::option_set opts;
|
||||
opts.set_if<query::partition_slice::option::reversed>(!forward);
|
||||
return do_query(_proxy, schema, exclusive_start_key, std::move(partition_ranges), std::move(ck_bounds), std::move(attrs_to_get), limit, cl,
|
||||
@@ -4392,17 +4332,6 @@ future<executor::request_return_type> executor::list_tables(client_state& client
|
||||
|
||||
future<executor::request_return_type> executor::describe_endpoints(client_state& client_state, service_permit permit, rjson::value request, std::string host_header) {
|
||||
_stats.api_operations.describe_endpoints++;
|
||||
// The alternator_describe_endpoints configuration can be used to disable
|
||||
// the DescribeEndpoints operation, or set it to return a fixed string
|
||||
std::string override = _proxy.data_dictionary().get_config().alternator_describe_endpoints();
|
||||
if (!override.empty()) {
|
||||
if (override == "disabled") {
|
||||
_stats.unsupported_operations++;
|
||||
return make_ready_future<request_return_type>(api_error::unknown_operation(
|
||||
"DescribeEndpoints disabled by configuration (alternator_describe_endpoints=disabled)"));
|
||||
}
|
||||
host_header = std::move(override);
|
||||
}
|
||||
rjson::value response = rjson::empty_object();
|
||||
// Without having any configuration parameter to say otherwise, we tell
|
||||
// the user to return to the same endpoint they used to reach us. The only
|
||||
@@ -4440,10 +4369,6 @@ future<executor::request_return_type> executor::describe_continuous_backups(clie
|
||||
try {
|
||||
schema = _proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
|
||||
} catch(data_dictionary::no_such_column_family&) {
|
||||
// DynamoDB returns validation error even when table does not exist
|
||||
// and the table name is invalid.
|
||||
validate_table_name(table_name);
|
||||
|
||||
throw api_error::table_not_found(
|
||||
format("Table {} not found", table_name));
|
||||
}
|
||||
@@ -4457,23 +4382,25 @@ future<executor::request_return_type> executor::describe_continuous_backups(clie
|
||||
co_return make_jsonable(std::move(response));
|
||||
}
|
||||
|
||||
// Create the metadata for the keyspace in which we put the alternator
|
||||
// table if it doesn't already exist.
|
||||
// Create the keyspace in which we put the alternator table, if it doesn't
|
||||
// already exist.
|
||||
// Currently, we automatically configure the keyspace based on the number
|
||||
// of nodes in the cluster: A cluster with 3 or more live nodes, gets RF=3.
|
||||
// A smaller cluster (presumably, a test only), gets RF=1. The user may
|
||||
// manually create the keyspace to override this predefined behavior.
|
||||
static lw_shared_ptr<keyspace_metadata> create_keyspace_metadata(std::string_view keyspace_name, service::storage_proxy& sp, gms::gossiper& gossiper, api::timestamp_type ts) {
|
||||
int endpoint_count = gossiper.num_endpoints();
|
||||
static future<std::vector<mutation>> create_keyspace(std::string_view keyspace_name, service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper, api::timestamp_type ts) {
|
||||
sstring keyspace_name_str(keyspace_name);
|
||||
int endpoint_count = gossiper.get_endpoint_states().size();
|
||||
int rf = 3;
|
||||
if (endpoint_count < rf) {
|
||||
rf = 1;
|
||||
elogger.warn("Creating keyspace '{}' for Alternator with unsafe RF={} because cluster only has {} nodes.",
|
||||
keyspace_name, rf, endpoint_count);
|
||||
keyspace_name_str, rf, endpoint_count);
|
||||
}
|
||||
auto opts = get_network_topology_options(sp, gossiper, rf);
|
||||
auto ksm = keyspace_metadata::new_keyspace(keyspace_name_str, "org.apache.cassandra.locator.NetworkTopologyStrategy", std::move(opts), true);
|
||||
|
||||
return keyspace_metadata::new_keyspace(keyspace_name, "org.apache.cassandra.locator.NetworkTopologyStrategy", std::move(opts), true);
|
||||
co_return mm.prepare_new_keyspace_announcement(ksm, ts);
|
||||
}
|
||||
|
||||
future<> executor::start() {
|
||||
|
||||
@@ -225,10 +225,9 @@ private:
|
||||
friend class rmw_operation;
|
||||
|
||||
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr);
|
||||
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&);
|
||||
|
||||
public:
|
||||
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&);
|
||||
|
||||
static std::optional<rjson::value> describe_single_item(schema_ptr,
|
||||
const query::partition_slice&,
|
||||
const cql3::selection::selection&,
|
||||
@@ -249,7 +248,7 @@ public:
|
||||
|
||||
static void add_stream_options(const rjson::value& stream_spec, schema_builder&, service::storage_proxy& sp);
|
||||
static void supplement_table_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp);
|
||||
static void supplement_table_stream_info(rjson::value& descr, const schema& schema, const service::storage_proxy& sp);
|
||||
static void supplement_table_stream_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp);
|
||||
};
|
||||
|
||||
// is_big() checks approximately if the given JSON value is "bigger" than
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
namespace alternator {
|
||||
|
||||
template <typename Func, typename Result = std::result_of_t<Func(expressionsParser&)>>
|
||||
static Result do_with_parser(std::string_view input, Func&& f) {
|
||||
Result do_with_parser(std::string_view input, Func&& f) {
|
||||
expressionsLexer::InputStreamType input_stream{
|
||||
reinterpret_cast<const ANTLR_UINT8*>(input.data()),
|
||||
ANTLR_ENC_UTF8,
|
||||
@@ -43,41 +43,31 @@ static Result do_with_parser(std::string_view input, Func&& f) {
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Func, typename Result = std::result_of_t<Func(expressionsParser&)>>
|
||||
static Result parse(const char* input_name, std::string_view input, Func&& f) {
|
||||
if (input.length() > 4096) {
|
||||
throw expressions_syntax_error(format("{} expression size {} exceeds allowed maximum 4096.",
|
||||
input_name, input.length()));
|
||||
}
|
||||
try {
|
||||
return do_with_parser(input, f);
|
||||
} catch (expressions_syntax_error& e) {
|
||||
// If already an expressions_syntax_error, don't print the type's
|
||||
// name (it's just ugly), just the message.
|
||||
// TODO: displayRecognitionError could set a position inside the
|
||||
// expressions_syntax_error in throws, and we could use it here to
|
||||
// mark the broken position in 'input'.
|
||||
throw expressions_syntax_error(format("Failed parsing {} '{}': {}",
|
||||
input_name, input, e.what()));
|
||||
} catch (...) {
|
||||
throw expressions_syntax_error(format("Failed parsing {} '{}': {}",
|
||||
input_name, input, std::current_exception()));
|
||||
}
|
||||
}
|
||||
|
||||
parsed::update_expression
|
||||
parse_update_expression(std::string_view query) {
|
||||
return parse("UpdateExpression", query, std::mem_fn(&expressionsParser::update_expression));
|
||||
try {
|
||||
return do_with_parser(query, std::mem_fn(&expressionsParser::update_expression));
|
||||
} catch (...) {
|
||||
throw expressions_syntax_error(format("Failed parsing UpdateExpression '{}': {}", query, std::current_exception()));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<parsed::path>
|
||||
parse_projection_expression(std::string_view query) {
|
||||
return parse ("ProjectionExpression", query, std::mem_fn(&expressionsParser::projection_expression));
|
||||
try {
|
||||
return do_with_parser(query, std::mem_fn(&expressionsParser::projection_expression));
|
||||
} catch (...) {
|
||||
throw expressions_syntax_error(format("Failed parsing ProjectionExpression '{}': {}", query, std::current_exception()));
|
||||
}
|
||||
}
|
||||
|
||||
parsed::condition_expression
|
||||
parse_condition_expression(std::string_view query, const char* caller) {
|
||||
return parse(caller, query, std::mem_fn(&expressionsParser::condition_expression));
|
||||
parse_condition_expression(std::string_view query) {
|
||||
try {
|
||||
return do_with_parser(query, std::mem_fn(&expressionsParser::condition_expression));
|
||||
} catch (...) {
|
||||
throw expressions_syntax_error(format("Failed parsing ConditionExpression '{}': {}", query, std::current_exception()));
|
||||
}
|
||||
}
|
||||
|
||||
namespace parsed {
|
||||
@@ -428,14 +418,9 @@ void for_condition_expression_on(const parsed::condition_expression& ce, const n
|
||||
// calculate_size() is ConditionExpression's size() function, i.e., it takes
|
||||
// a JSON-encoded value and returns its "size" as defined differently for the
|
||||
// different types - also as a JSON-encoded number.
|
||||
// If the value's type (e.g. number) has no size defined, there are two cases:
|
||||
// 1. If from_data (the value came directly from an attribute of the data),
|
||||
// It returns a JSON-encoded "null" value. Comparisons against this
|
||||
// non-numeric value will later fail, so eventually the application will
|
||||
// get a ConditionalCheckFailedException.
|
||||
// 2. Otherwise (the value came from a constant in the query or some other
|
||||
// calculation), throw a ValidationException.
|
||||
static rjson::value calculate_size(const rjson::value& v, bool from_data) {
|
||||
// It return a JSON-encoded "null" value if this value's type has no size
|
||||
// defined. Comparisons against this non-numeric value will later fail.
|
||||
static rjson::value calculate_size(const rjson::value& v) {
|
||||
// NOTE: If v is improperly formatted for our JSON value encoding, it
|
||||
// must come from the request itself, not from the database, so it makes
|
||||
// sense to throw a ValidationException if we see such a problem.
|
||||
@@ -464,12 +449,10 @@ static rjson::value calculate_size(const rjson::value& v, bool from_data) {
|
||||
throw api_error::validation(format("invalid byte string: {}", v));
|
||||
}
|
||||
ret = base64_decoded_len(rjson::to_string_view(it->value));
|
||||
} else if (from_data) {
|
||||
} else {
|
||||
rjson::value json_ret = rjson::empty_object();
|
||||
rjson::add(json_ret, "null", rjson::value(true));
|
||||
return json_ret;
|
||||
} else {
|
||||
throw api_error::validation(format("Unsupported operand type {} for function size()", it->name));
|
||||
}
|
||||
rjson::value json_ret = rjson::empty_object();
|
||||
rjson::add(json_ret, "N", rjson::from_string(std::to_string(ret)));
|
||||
@@ -551,7 +534,7 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
|
||||
format("{}: size() accepts 1 parameter, got {}", caller, f._parameters.size()));
|
||||
}
|
||||
rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
|
||||
return calculate_size(v, f._parameters[0].is_path());
|
||||
return calculate_size(v);
|
||||
}
|
||||
},
|
||||
{"attribute_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
||||
@@ -679,7 +662,7 @@ static rjson::value extract_path(const rjson::value* item,
|
||||
// objects. But today Alternator does not validate the structure
|
||||
// of nested documents before storing them, so this can happen on
|
||||
// read.
|
||||
throw api_error::validation(format("{}: malformed item read: {}", caller, *item));
|
||||
throw api_error::validation(format("{}: malformed item read: {}", *item));
|
||||
}
|
||||
const char* type = v->MemberBegin()->name.GetString();
|
||||
v = &(v->MemberBegin()->value);
|
||||
|
||||
@@ -74,22 +74,7 @@ options {
|
||||
*/
|
||||
@parser::context {
|
||||
void displayRecognitionError(ANTLR_UINT8** token_names, ExceptionBaseType* ex) {
|
||||
const char* err;
|
||||
switch (ex->getType()) {
|
||||
case antlr3::ExceptionType::FAILED_PREDICATE_EXCEPTION:
|
||||
err = "expression nested too deeply";
|
||||
break;
|
||||
default:
|
||||
err = "syntax error";
|
||||
break;
|
||||
}
|
||||
// Alternator expressions are always single line so ex->get_line()
|
||||
// is always 1, no sense to print it.
|
||||
// TODO: return the position as part of the exception, so the
|
||||
// caller in expressions.cc that knows the expression string can
|
||||
// mark the error position in the final error message.
|
||||
throw expressions_syntax_error(format("{} at char {}", err,
|
||||
ex->get_charPositionInLine()));
|
||||
throw expressions_syntax_error("syntax error");
|
||||
}
|
||||
}
|
||||
@lexer::context {
|
||||
@@ -98,23 +83,6 @@ options {
|
||||
}
|
||||
}
|
||||
|
||||
/* Unfortunately, ANTLR uses recursion - not the heap - to parse recursive
|
||||
* expressions. To make things even worse, ANTLR has no way to limit the
|
||||
* depth of this recursion (unlike Yacc which has YYMAXDEPTH). So deeply-
|
||||
* nested expression like "(((((((((((((..." can easily crash Scylla on a
|
||||
* stack overflow (see issue #14477).
|
||||
*
|
||||
* We are lucky that in the grammar for DynamoDB expressions (below),
|
||||
* only a few specific rules can recurse, so it was fairly easy to add a
|
||||
* "depth" counter to a few specific rules, and then use a predicate
|
||||
* "{depth<MAX_DEPTH}?" to avoid parsing if the depth exceeds this limit,
|
||||
* and throw a FAILED_PREDICATE_EXCEPTION in that case, which we will
|
||||
* report to the user as a "expression nested too deeply" error.
|
||||
*/
|
||||
@parser::members {
|
||||
static constexpr int MAX_DEPTH = 400;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lexical analysis phase, i.e., splitting the input up to tokens.
|
||||
* Lexical analyzer rules have names starting in capital letters.
|
||||
@@ -187,20 +155,19 @@ path returns [parsed::path p]:
|
||||
| '[' INTEGER ']' { $p.add_index(std::stoi($INTEGER.text)); }
|
||||
)*;
|
||||
|
||||
/* See comment above why the "depth" counter was needed here */
|
||||
value[int depth] returns [parsed::value v]:
|
||||
value returns [parsed::value v]:
|
||||
VALREF { $v.set_valref($VALREF.text); }
|
||||
| path { $v.set_path($path.p); }
|
||||
| {depth<MAX_DEPTH}? NAME { $v.set_func_name($NAME.text); }
|
||||
'(' x=value[depth+1] { $v.add_func_parameter($x.v); }
|
||||
(',' x=value[depth+1] { $v.add_func_parameter($x.v); })*
|
||||
| NAME { $v.set_func_name($NAME.text); }
|
||||
'(' x=value { $v.add_func_parameter($x.v); }
|
||||
(',' x=value { $v.add_func_parameter($x.v); })*
|
||||
')'
|
||||
;
|
||||
|
||||
update_expression_set_rhs returns [parsed::set_rhs rhs]:
|
||||
v=value[0] { $rhs.set_value(std::move($v.v)); }
|
||||
( '+' v=value[0] { $rhs.set_plus(std::move($v.v)); }
|
||||
| '-' v=value[0] { $rhs.set_minus(std::move($v.v)); }
|
||||
v=value { $rhs.set_value(std::move($v.v)); }
|
||||
( '+' v=value { $rhs.set_plus(std::move($v.v)); }
|
||||
| '-' v=value { $rhs.set_minus(std::move($v.v)); }
|
||||
)?
|
||||
;
|
||||
|
||||
@@ -238,7 +205,7 @@ projection_expression returns [std::vector<parsed::path> v]:
|
||||
|
||||
|
||||
primitive_condition returns [parsed::primitive_condition c]:
|
||||
v=value[0] { $c.add_value(std::move($v.v));
|
||||
v=value { $c.add_value(std::move($v.v));
|
||||
$c.set_operator(parsed::primitive_condition::type::VALUE); }
|
||||
( ( '=' { $c.set_operator(parsed::primitive_condition::type::EQ); }
|
||||
| '<' '>' { $c.set_operator(parsed::primitive_condition::type::NE); }
|
||||
@@ -247,14 +214,14 @@ primitive_condition returns [parsed::primitive_condition c]:
|
||||
| '>' { $c.set_operator(parsed::primitive_condition::type::GT); }
|
||||
| '>' '=' { $c.set_operator(parsed::primitive_condition::type::GE); }
|
||||
)
|
||||
v=value[0] { $c.add_value(std::move($v.v)); }
|
||||
v=value { $c.add_value(std::move($v.v)); }
|
||||
| BETWEEN { $c.set_operator(parsed::primitive_condition::type::BETWEEN); }
|
||||
v=value[0] { $c.add_value(std::move($v.v)); }
|
||||
v=value { $c.add_value(std::move($v.v)); }
|
||||
AND
|
||||
v=value[0] { $c.add_value(std::move($v.v)); }
|
||||
v=value { $c.add_value(std::move($v.v)); }
|
||||
| IN '(' { $c.set_operator(parsed::primitive_condition::type::IN); }
|
||||
v=value[0] { $c.add_value(std::move($v.v)); }
|
||||
(',' v=value[0] { $c.add_value(std::move($v.v)); })*
|
||||
v=value { $c.add_value(std::move($v.v)); }
|
||||
(',' v=value { $c.add_value(std::move($v.v)); })*
|
||||
')'
|
||||
)?
|
||||
;
|
||||
@@ -264,20 +231,19 @@ primitive_condition returns [parsed::primitive_condition c]:
|
||||
// common rule prefixes, and (lack of) support for operator precedence.
|
||||
// These rules could have been written more clearly using a more powerful
|
||||
// parser generator - such as Yacc.
|
||||
// See comment above why the "depth" counter was needed here.
|
||||
boolean_expression[int depth] returns [parsed::condition_expression e]:
|
||||
b=boolean_expression_1[depth] { $e.append(std::move($b.e), '|'); }
|
||||
(OR b=boolean_expression_1[depth] { $e.append(std::move($b.e), '|'); } )*
|
||||
boolean_expression returns [parsed::condition_expression e]:
|
||||
b=boolean_expression_1 { $e.append(std::move($b.e), '|'); }
|
||||
(OR b=boolean_expression_1 { $e.append(std::move($b.e), '|'); } )*
|
||||
;
|
||||
boolean_expression_1[int depth] returns [parsed::condition_expression e]:
|
||||
b=boolean_expression_2[depth] { $e.append(std::move($b.e), '&'); }
|
||||
(AND b=boolean_expression_2[depth] { $e.append(std::move($b.e), '&'); } )*
|
||||
boolean_expression_1 returns [parsed::condition_expression e]:
|
||||
b=boolean_expression_2 { $e.append(std::move($b.e), '&'); }
|
||||
(AND b=boolean_expression_2 { $e.append(std::move($b.e), '&'); } )*
|
||||
;
|
||||
boolean_expression_2[int depth] returns [parsed::condition_expression e]:
|
||||
boolean_expression_2 returns [parsed::condition_expression e]:
|
||||
p=primitive_condition { $e.set_primitive(std::move($p.c)); }
|
||||
| {depth<MAX_DEPTH}? NOT b=boolean_expression_2[depth+1] { $e = std::move($b.e); $e.apply_not(); }
|
||||
| {depth<MAX_DEPTH}? '(' b=boolean_expression[depth+1] ')' { $e = std::move($b.e); }
|
||||
| NOT b=boolean_expression_2 { $e = std::move($b.e); $e.apply_not(); }
|
||||
| '(' b=boolean_expression ')' { $e = std::move($b.e); }
|
||||
;
|
||||
|
||||
condition_expression returns [parsed::condition_expression e]:
|
||||
boolean_expression[0] { e=std::move($boolean_expression.e); } EOF;
|
||||
boolean_expression { e=std::move($boolean_expression.e); } EOF;
|
||||
|
||||
@@ -28,7 +28,7 @@ public:
|
||||
|
||||
parsed::update_expression parse_update_expression(std::string_view query);
|
||||
std::vector<parsed::path> parse_projection_expression(std::string_view query);
|
||||
parsed::condition_expression parse_condition_expression(std::string_view query, const char* caller);
|
||||
parsed::condition_expression parse_condition_expression(std::string_view query);
|
||||
|
||||
void resolve_update_expression(parsed::update_expression& ue,
|
||||
const rjson::value* expression_attribute_names,
|
||||
|
||||
@@ -424,7 +424,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
co_await client_state.maybe_update_per_service_level_params();
|
||||
|
||||
tracing::trace_state_ptr trace_state = maybe_trace_query(client_state, username, op, content);
|
||||
tracing::trace(trace_state, "{}", op);
|
||||
tracing::trace(trace_state, op);
|
||||
rjson::value json_request = co_await _json_parser.parse(std::move(content));
|
||||
co_return co_await callback_it->second(_executor, client_state, trace_state,
|
||||
make_service_permit(std::move(units)), std::move(json_request), std::move(req));
|
||||
|
||||
@@ -1096,7 +1096,7 @@ void executor::add_stream_options(const rjson::value& stream_specification, sche
|
||||
}
|
||||
}
|
||||
|
||||
void executor::supplement_table_stream_info(rjson::value& descr, const schema& schema, const service::storage_proxy& sp) {
|
||||
void executor::supplement_table_stream_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp) {
|
||||
auto& opts = schema.cdc_options();
|
||||
if (opts.enabled()) {
|
||||
auto db = sp.data_dictionary();
|
||||
|
||||
@@ -430,7 +430,6 @@ class token_ranges_owned_by_this_shard {
|
||||
size_t _range_idx;
|
||||
size_t _end_idx;
|
||||
std::optional<dht::selective_token_range_sharder> _intersecter;
|
||||
locator::effective_replication_map_ptr _erm;
|
||||
public:
|
||||
token_ranges_owned_by_this_shard(replica::database& db, gms::gossiper& g, schema_ptr s)
|
||||
: _s(s)
|
||||
@@ -438,7 +437,6 @@ public:
|
||||
g, utils::fb_utilities::get_broadcast_address())
|
||||
, _range_idx(random_offset(0, _token_ranges.size() - 1))
|
||||
, _end_idx(_range_idx + _token_ranges.size())
|
||||
, _erm(s->table().get_effective_replication_map())
|
||||
{
|
||||
tlogger.debug("Generating token ranges starting from base range {} of {}", _range_idx, _token_ranges.size());
|
||||
}
|
||||
@@ -471,7 +469,7 @@ public:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
_intersecter.emplace(_erm->get_sharder(*_s), _token_ranges[_range_idx % _token_ranges.size()], this_shard_id());
|
||||
_intersecter.emplace(_s->get_sharder(), _token_ranges[_range_idx % _token_ranges.size()], this_shard_id());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ set(swagger_files
|
||||
api-doc/hinted_handoff.json
|
||||
api-doc/lsa.json
|
||||
api-doc/messaging_service.json
|
||||
api-doc/metrics.json
|
||||
api-doc/storage_proxy.json
|
||||
api-doc/storage_service.json
|
||||
api-doc/stream_manager.json
|
||||
|
||||
@@ -34,14 +34,6 @@
|
||||
"allowMultiple":false,
|
||||
"type":"boolean",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"parameters",
|
||||
"description":"dict of parameters to pass to the injection (json format)",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"dict",
|
||||
"paramType":"body"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -66,30 +58,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/v2/error_injection/injection/{injection}/message",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Send message to trigger an event in injection's code",
|
||||
"type":"void",
|
||||
"nickname":"message_injection",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"injection",
|
||||
"description":"injection name, should correspond to an injection added in code",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/v2/error_injection/injection",
|
||||
"operations":[
|
||||
@@ -118,15 +86,5 @@
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"components":{
|
||||
"schemas": {
|
||||
"dict": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -245,7 +245,7 @@
|
||||
"GOSSIP_SHUTDOWN",
|
||||
"DEFINITIONS_UPDATE",
|
||||
"TRUNCATE",
|
||||
"UNUSED__REPLICATION_FINISHED",
|
||||
"REPLICATION_FINISHED",
|
||||
"MIGRATION_REQUEST",
|
||||
"PREPARE_MESSAGE",
|
||||
"PREPARE_DONE_MESSAGE",
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
"metrics_config": {
|
||||
"id": "metrics_config",
|
||||
"summary": "An entry in the metrics configuration",
|
||||
"properties": {
|
||||
"source_labels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The source labels, a match is based on concatination of the labels"
|
||||
},
|
||||
"action": {
|
||||
"type": "string",
|
||||
"description": "The action to perfrom on match",
|
||||
"enum": ["skip_when_empty", "report_when_empty", "replace", "keep", "drop", "drop_label"]
|
||||
},
|
||||
"target_label": {
|
||||
"type": "string",
|
||||
"description": "The application state version"
|
||||
},
|
||||
"replacement": {
|
||||
"type": "string",
|
||||
"description": "The replacement string to use when replacing a value"
|
||||
},
|
||||
"regex": {
|
||||
"type": "string",
|
||||
"description": "The regex string to use when replacing a value"
|
||||
},
|
||||
"separator": {
|
||||
"type": "string",
|
||||
"description": "The separator string to use when concatinating the labels"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
"/v2/metrics-config/":{
|
||||
"get":{
|
||||
"description":"Return the metrics layer configuration",
|
||||
"operationId":"get_metrics_config",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"tags":[
|
||||
"metrics"
|
||||
],
|
||||
"parameters":[
|
||||
],
|
||||
"responses":{
|
||||
"200":{
|
||||
"schema": {
|
||||
"type":"array",
|
||||
"items":{
|
||||
"$ref":"#/definitions/metrics_config",
|
||||
"description":"metrics Config value"
|
||||
}
|
||||
}
|
||||
},
|
||||
"default":{
|
||||
"description":"unexpected error",
|
||||
"schema":{
|
||||
"$ref":"#/definitions/ErrorModel"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
"description":"Set the metrics layer relabel configuration",
|
||||
"operationId":"set_metrics_config",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"tags":[
|
||||
"metrics"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"in":"body",
|
||||
"name":"conf",
|
||||
"description":"An array of relabel_config objects",
|
||||
"schema": {
|
||||
"type":"array",
|
||||
"items":{
|
||||
"$ref":"#/definitions/metrics_config",
|
||||
"description":"metrics Config value"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses":{
|
||||
"200":{
|
||||
"description": "OK"
|
||||
},
|
||||
"default":{
|
||||
"description":"unexpected error",
|
||||
"schema":{
|
||||
"$ref":"#/definitions/ErrorModel"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -465,7 +465,7 @@
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Retrieve the mapping of endpoint to host ID of all nodes that own tokens",
|
||||
"summary":"Retrieve the mapping of endpoint to host ID",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"mapper"
|
||||
@@ -1114,14 +1114,6 @@
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"ranges_parallelism",
|
||||
"description":"An integer specifying the number of ranges to repair in parallel by user request. If this number is bigger than the max_repair_ranges_in_parallel calculated by Scylla core, the smaller one will be used.",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -1954,7 +1946,7 @@
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Forces this node to recalculate versions of schema objects.",
|
||||
"summary":"Reset local schema",
|
||||
"type":"void",
|
||||
"nickname":"reset_local_schema",
|
||||
"produces":[
|
||||
@@ -2495,23 +2487,7 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/raft_topology/reload",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Reload Raft topology state from disk.",
|
||||
"type":"void",
|
||||
"nickname":"reload_raft_topology_state",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"models":{
|
||||
"mapper":{
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
}
|
||||
},
|
||||
"host": "{{Host}}",
|
||||
"basePath": "/",
|
||||
"basePath": "/v2",
|
||||
"schemes": [
|
||||
"http"
|
||||
],
|
||||
|
||||
@@ -1,182 +1,182 @@
|
||||
{
|
||||
"apiVersion":"0.0.1",
|
||||
"swaggerVersion":"1.2",
|
||||
"basePath":"{{Protocol}}://{{Host}}",
|
||||
"resourcePath":"/task_manager",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"apis":[
|
||||
{
|
||||
"path":"/task_manager/list_modules",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get all modules names",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"string"
|
||||
},
|
||||
"nickname":"get_modules",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/list_module_tasks/{module}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get a list of tasks",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"task_stats"
|
||||
},
|
||||
"nickname":"get_tasks",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"module",
|
||||
"description":"The module to query about",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
},
|
||||
{
|
||||
"name":"internal",
|
||||
"description":"Boolean flag indicating whether internal tasks should be shown (false by default)",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"boolean",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"keyspace",
|
||||
"description":"The keyspace to query about",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"table",
|
||||
"description":"The table to query about",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/task_status/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get task status",
|
||||
"type":"task_status",
|
||||
"nickname":"get_task_status",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to query about",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/abort_task/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Abort running task and its descendants",
|
||||
"type":"void",
|
||||
"nickname":"abort_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to abort",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/wait_task/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Wait for a task to complete",
|
||||
"type":"task_status",
|
||||
"nickname":"wait_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to wait for",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/task_status_recursive/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get statuses of the task and all its descendants",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"task_status"
|
||||
},
|
||||
"nickname":"get_task_status_recursively",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to query about",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"apiVersion":"0.0.1",
|
||||
"swaggerVersion":"1.2",
|
||||
"basePath":"{{Protocol}}://{{Host}}",
|
||||
"resourcePath":"/task_manager",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"apis":[
|
||||
{
|
||||
"path":"/task_manager/list_modules",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get all modules names",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"string"
|
||||
},
|
||||
"nickname":"get_modules",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/list_module_tasks/{module}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get a list of tasks",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"task_stats"
|
||||
},
|
||||
"nickname":"get_tasks",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"module",
|
||||
"description":"The module to query about",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
},
|
||||
{
|
||||
"name":"internal",
|
||||
"description":"Boolean flag indicating whether internal tasks should be shown (false by default)",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"boolean",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"keyspace",
|
||||
"description":"The keyspace to query about",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"table",
|
||||
"description":"The table to query about",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/task_status/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get task status",
|
||||
"type":"task_status",
|
||||
"nickname":"get_task_status",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to query about",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/abort_task/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Abort running task and its descendants",
|
||||
"type":"void",
|
||||
"nickname":"abort_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to abort",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/wait_task/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Wait for a task to complete",
|
||||
"type":"task_status",
|
||||
"nickname":"wait_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to wait for",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/task_status_recursive/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get statuses of the task and all its descendants",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"task_status"
|
||||
},
|
||||
"nickname":"get_task_status_recursively",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to query about",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager/ttl",
|
||||
"operations":[
|
||||
{
|
||||
@@ -199,96 +199,88 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"models":{
|
||||
"task_stats" :{
|
||||
"id": "task_stats",
|
||||
"description":"A task statistics object",
|
||||
"properties":{
|
||||
"task_id":{
|
||||
"type":"string",
|
||||
"description":"The uuid of a task"
|
||||
},
|
||||
"state":{
|
||||
"type":"string",
|
||||
"enum":[
|
||||
}
|
||||
],
|
||||
"models":{
|
||||
"task_stats" :{
|
||||
"id": "task_stats",
|
||||
"description":"A task statistics object",
|
||||
"properties":{
|
||||
"task_id":{
|
||||
"type":"string",
|
||||
"description":"The uuid of a task"
|
||||
},
|
||||
"state":{
|
||||
"type":"string",
|
||||
"enum":[
|
||||
"created",
|
||||
"running",
|
||||
"done",
|
||||
"failed"
|
||||
],
|
||||
"description":"The state of a task"
|
||||
},
|
||||
"type":{
|
||||
"type":"string",
|
||||
"description":"The description of the task"
|
||||
},
|
||||
"scope":{
|
||||
"type":"string",
|
||||
"description":"The scope of the task"
|
||||
},
|
||||
"keyspace":{
|
||||
"type":"string",
|
||||
"description":"The keyspace the task is working on (if applicable)"
|
||||
},
|
||||
"table":{
|
||||
"type":"string",
|
||||
"description":"The table the task is working on (if applicable)"
|
||||
},
|
||||
"entity":{
|
||||
"type":"string",
|
||||
"description":"Task-specific entity description"
|
||||
},
|
||||
"sequence_number":{
|
||||
"type":"long",
|
||||
"description":"The running sequence number of the task"
|
||||
}
|
||||
}
|
||||
},
|
||||
"task_status":{
|
||||
"id":"task_status",
|
||||
"description":"A task status object",
|
||||
"properties":{
|
||||
"id":{
|
||||
"type":"string",
|
||||
"description":"The uuid of the task"
|
||||
},
|
||||
"type":{
|
||||
"type":"string",
|
||||
"description":"The description of the task"
|
||||
},
|
||||
"scope":{
|
||||
"type":"string",
|
||||
"description":"The scope of the task"
|
||||
},
|
||||
"state":{
|
||||
],
|
||||
"description":"The state of a task"
|
||||
},
|
||||
"type":{
|
||||
"type":"string",
|
||||
"description":"The description of the task"
|
||||
},
|
||||
"keyspace":{
|
||||
"type":"string",
|
||||
"description":"The keyspace the task is working on (if applicable)"
|
||||
},
|
||||
"table":{
|
||||
"type":"string",
|
||||
"description":"The table the task is working on (if applicable)"
|
||||
},
|
||||
"entity":{
|
||||
"type":"string",
|
||||
"description":"Task-specific entity description"
|
||||
},
|
||||
"sequence_number":{
|
||||
"type":"long",
|
||||
"description":"The running sequence number of the task"
|
||||
}
|
||||
}
|
||||
},
|
||||
"task_status":{
|
||||
"id":"task_status",
|
||||
"description":"A task status object",
|
||||
"properties":{
|
||||
"id":{
|
||||
"type":"string",
|
||||
"description":"The uuid of the task"
|
||||
},
|
||||
"type":{
|
||||
"type":"string",
|
||||
"description":"The description of the task"
|
||||
},
|
||||
"state":{
|
||||
"type":"string",
|
||||
"enum":[
|
||||
"created",
|
||||
"running",
|
||||
"done",
|
||||
"failed"
|
||||
"created",
|
||||
"running",
|
||||
"done",
|
||||
"failed"
|
||||
],
|
||||
"description":"The state of the task"
|
||||
},
|
||||
"is_abortable":{
|
||||
"type":"boolean",
|
||||
"description":"Boolean flag indicating whether the task can be aborted"
|
||||
},
|
||||
"start_time":{
|
||||
"type":"datetime",
|
||||
"description":"The start time of the task"
|
||||
},
|
||||
"end_time":{
|
||||
"type":"datetime",
|
||||
"description":"The end time of the task (unspecified when the task is not completed)"
|
||||
},
|
||||
"error":{
|
||||
"type":"string",
|
||||
"description":"Error string, if the task failed"
|
||||
},
|
||||
"parent_id":{
|
||||
"description":"The state of the task"
|
||||
},
|
||||
"is_abortable":{
|
||||
"type":"boolean",
|
||||
"description":"Boolean flag indicating whether the task can be aborted"
|
||||
},
|
||||
"start_time":{
|
||||
"type":"datetime",
|
||||
"description":"The start time of the task"
|
||||
},
|
||||
"end_time":{
|
||||
"type":"datetime",
|
||||
"description":"The end time of the task (unspecified when the task is not completed)"
|
||||
},
|
||||
"error":{
|
||||
"type":"string",
|
||||
"description":"Error string, if the task failed"
|
||||
},
|
||||
"parent_id":{
|
||||
"type":"string",
|
||||
"description":"The uuid of the parent task"
|
||||
},
|
||||
@@ -326,12 +318,12 @@
|
||||
},
|
||||
"children_ids":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"string"
|
||||
},
|
||||
"items":{
|
||||
"type":"string"
|
||||
},
|
||||
"description":"Task IDs of children of this task"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,153 +1,153 @@
|
||||
{
|
||||
"apiVersion":"0.0.1",
|
||||
"swaggerVersion":"1.2",
|
||||
"basePath":"{{Protocol}}://{{Host}}",
|
||||
"resourcePath":"/task_manager_test",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"apis":[
|
||||
{
|
||||
"path":"/task_manager_test/test_module",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Register test module in task manager",
|
||||
"type":"void",
|
||||
"nickname":"register_test_module",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
]
|
||||
},
|
||||
{
|
||||
"method":"DELETE",
|
||||
"summary":"Unregister test module in task manager",
|
||||
"type":"void",
|
||||
"nickname":"unregister_test_module",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager_test/test_task",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Register test task",
|
||||
"type":"string",
|
||||
"nickname":"register_test_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to register",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"shard",
|
||||
"description":"The shard of the task",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"long",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"parent_id",
|
||||
"description":"The uuid of a parent task",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"keyspace",
|
||||
"description":"The keyspace the task is working on",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"table",
|
||||
"description":"The table the task is working on",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"entity",
|
||||
"description":"Task-specific entity description",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"method":"DELETE",
|
||||
"summary":"Unregister test task",
|
||||
"type":"void",
|
||||
"nickname":"unregister_test_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to register",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager_test/finish_test_task/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Finish test task",
|
||||
"type":"void",
|
||||
"nickname":"finish_test_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to finish",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
},
|
||||
{
|
||||
"name":"error",
|
||||
"description":"The error with which task fails (if it does)",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
"apiVersion":"0.0.1",
|
||||
"swaggerVersion":"1.2",
|
||||
"basePath":"{{Protocol}}://{{Host}}",
|
||||
"resourcePath":"/task_manager_test",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"apis":[
|
||||
{
|
||||
"path":"/task_manager_test/test_module",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Register test module in task manager",
|
||||
"type":"void",
|
||||
"nickname":"register_test_module",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
]
|
||||
},
|
||||
{
|
||||
"method":"DELETE",
|
||||
"summary":"Unregister test module in task manager",
|
||||
"type":"void",
|
||||
"nickname":"unregister_test_module",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager_test/test_task",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Register test task",
|
||||
"type":"string",
|
||||
"nickname":"register_test_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to register",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"shard",
|
||||
"description":"The shard of the task",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"long",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"parent_id",
|
||||
"description":"The uuid of a parent task",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"keyspace",
|
||||
"description":"The keyspace the task is working on",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"table",
|
||||
"description":"The table the task is working on",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"entity",
|
||||
"description":"Task-specific entity description",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"method":"DELETE",
|
||||
"summary":"Unregister test task",
|
||||
"type":"void",
|
||||
"nickname":"unregister_test_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to register",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/task_manager_test/finish_test_task/{task_id}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Finish test task",
|
||||
"type":"void",
|
||||
"nickname":"finish_test_task",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"task_id",
|
||||
"description":"The uuid of a task to finish",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
},
|
||||
{
|
||||
"name":"error",
|
||||
"description":"The error with which task fails (if it does)",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
46
api/api.cc
46
api/api.cc
@@ -60,10 +60,8 @@ future<> set_server_init(http_context& ctx) {
|
||||
rb->set_api_doc(r);
|
||||
rb02->set_api_doc(r);
|
||||
rb02->register_api_file(r, "swagger20_header");
|
||||
rb02->register_api_file(r, "metrics");
|
||||
rb->register_function(r, "system",
|
||||
"The system related API");
|
||||
rb02->add_definitions_file(r, "metrics");
|
||||
set_system(ctx, r);
|
||||
});
|
||||
}
|
||||
@@ -71,7 +69,7 @@ future<> set_server_init(http_context& ctx) {
|
||||
future<> set_server_config(http_context& ctx, const db::config& cfg) {
|
||||
auto rb02 = std::make_shared < api_registry_builder20 > (ctx.api_doc, "/v2");
|
||||
return ctx.http_server.set_routes([&ctx, &cfg, rb02](routes& r) {
|
||||
set_config(rb02, ctx, r, cfg, false);
|
||||
set_config(rb02, ctx, r, cfg);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -102,16 +100,12 @@ future<> unset_rpc_controller(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_rpc_controller(ctx, r); });
|
||||
}
|
||||
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
|
||||
return register_api(ctx, "storage_service", "The storage service API", [&ss, &group0_client] (http_context& ctx, routes& r) {
|
||||
set_storage_service(ctx, r, ss, group0_client);
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<gms::gossiper>& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks) {
|
||||
return register_api(ctx, "storage_service", "The storage service API", [&ss, &g, &cdc_gs, &sys_ks] (http_context& ctx, routes& r) {
|
||||
set_storage_service(ctx, r, ss, g.local(), cdc_gs, sys_ks);
|
||||
});
|
||||
}
|
||||
|
||||
future<> unset_server_storage_service(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_storage_service(ctx, r); });
|
||||
}
|
||||
|
||||
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader) {
|
||||
return ctx.http_server.set_routes([&ctx, &sst_loader] (routes& r) { set_sstables_loader(ctx, r, sst_loader); });
|
||||
}
|
||||
@@ -193,10 +187,10 @@ future<> unset_server_messaging_service(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_messaging_service(ctx, r); });
|
||||
}
|
||||
|
||||
future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_proxy>& proxy) {
|
||||
future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_service>& ss) {
|
||||
return register_api(ctx, "storage_proxy",
|
||||
"The storage proxy API", [&proxy] (http_context& ctx, routes& r) {
|
||||
set_storage_proxy(ctx, r, proxy);
|
||||
"The storage proxy API", [&ss] (http_context& ctx, routes& r) {
|
||||
set_storage_proxy(ctx, r, ss);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -220,10 +214,10 @@ future<> set_server_cache(http_context& ctx) {
|
||||
"The cache service API", set_cache_service);
|
||||
}
|
||||
|
||||
future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& proxy) {
|
||||
future<> set_hinted_handoff(http_context& ctx, sharded<gms::gossiper>& g) {
|
||||
return register_api(ctx, "hinted_handoff",
|
||||
"The hinted handoff API", [&proxy] (http_context& ctx, routes& r) {
|
||||
set_hinted_handoff(ctx, r, proxy);
|
||||
"The hinted handoff API", [&g] (http_context& ctx, routes& r) {
|
||||
set_hinted_handoff(ctx, r, g.local());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -270,36 +264,28 @@ future<> set_server_done(http_context& ctx) {
|
||||
});
|
||||
}
|
||||
|
||||
future<> set_server_task_manager(http_context& ctx, sharded<tasks::task_manager>& tm, lw_shared_ptr<db::config> cfg) {
|
||||
future<> set_server_task_manager(http_context& ctx, lw_shared_ptr<db::config> cfg) {
|
||||
auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
|
||||
|
||||
return ctx.http_server.set_routes([rb, &ctx, &tm, &cfg = *cfg](routes& r) {
|
||||
return ctx.http_server.set_routes([rb, &ctx, &cfg = *cfg](routes& r) {
|
||||
rb->register_function(r, "task_manager",
|
||||
"The task manager API");
|
||||
set_task_manager(ctx, r, tm, cfg);
|
||||
set_task_manager(ctx, r, cfg);
|
||||
});
|
||||
}
|
||||
|
||||
future<> unset_server_task_manager(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_task_manager(ctx, r); });
|
||||
}
|
||||
|
||||
#ifndef SCYLLA_BUILD_MODE_RELEASE
|
||||
|
||||
future<> set_server_task_manager_test(http_context& ctx, sharded<tasks::task_manager>& tm) {
|
||||
future<> set_server_task_manager_test(http_context& ctx) {
|
||||
auto rb = std::make_shared < api_registry_builder > (ctx.api_doc);
|
||||
|
||||
return ctx.http_server.set_routes([rb, &ctx, &tm](routes& r) mutable {
|
||||
return ctx.http_server.set_routes([rb, &ctx](routes& r) mutable {
|
||||
rb->register_function(r, "task_manager_test",
|
||||
"The task manager test API");
|
||||
set_task_manager_test(ctx, r, tm);
|
||||
set_task_manager_test(ctx, r);
|
||||
});
|
||||
}
|
||||
|
||||
future<> unset_server_task_manager_test(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_task_manager_test(ctx, r); });
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void req_params::process(const request& req) {
|
||||
|
||||
@@ -22,7 +22,6 @@ namespace service {
|
||||
class load_meter;
|
||||
class storage_proxy;
|
||||
class storage_service;
|
||||
class raft_group0_client;
|
||||
|
||||
} // namespace service
|
||||
|
||||
@@ -52,6 +51,7 @@ class system_keyspace;
|
||||
}
|
||||
namespace netw { class messaging_service; }
|
||||
class repair_service;
|
||||
namespace cdc { class generation_service; }
|
||||
|
||||
namespace gms {
|
||||
|
||||
@@ -61,10 +61,6 @@ class gossiper;
|
||||
|
||||
namespace auth { class service; }
|
||||
|
||||
namespace tasks {
|
||||
class task_manager;
|
||||
}
|
||||
|
||||
namespace api {
|
||||
|
||||
struct http_context {
|
||||
@@ -72,12 +68,15 @@ struct http_context {
|
||||
sstring api_doc;
|
||||
httpd::http_server_control http_server;
|
||||
distributed<replica::database>& db;
|
||||
distributed<service::storage_proxy>& sp;
|
||||
service::load_meter& lmeter;
|
||||
const sharded<locator::shared_token_metadata>& shared_token_metadata;
|
||||
sharded<tasks::task_manager>& tm;
|
||||
|
||||
http_context(distributed<replica::database>& _db,
|
||||
service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm)
|
||||
: db(_db), lmeter(_lm), shared_token_metadata(_stm) {
|
||||
distributed<service::storage_proxy>& _sp,
|
||||
service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm, sharded<tasks::task_manager>& _tm)
|
||||
: db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm), tm(_tm) {
|
||||
}
|
||||
|
||||
const locator::token_metadata& get_token_metadata();
|
||||
@@ -87,8 +86,7 @@ future<> set_server_init(http_context& ctx);
|
||||
future<> set_server_config(http_context& ctx, const db::config& cfg);
|
||||
future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snitch);
|
||||
future<> unset_server_snitch(http_context& ctx);
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
|
||||
future<> unset_server_storage_service(http_context& ctx);
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<gms::gossiper>& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks);
|
||||
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
|
||||
future<> unset_server_sstables_loader(http_context& ctx);
|
||||
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb);
|
||||
@@ -108,19 +106,17 @@ future<> set_server_load_sstable(http_context& ctx, sharded<db::system_keyspace>
|
||||
future<> unset_server_load_sstable(http_context& ctx);
|
||||
future<> set_server_messaging_service(http_context& ctx, sharded<netw::messaging_service>& ms);
|
||||
future<> unset_server_messaging_service(http_context& ctx);
|
||||
future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_proxy>& proxy);
|
||||
future<> set_server_storage_proxy(http_context& ctx, sharded<service::storage_service>& ss);
|
||||
future<> unset_server_storage_proxy(http_context& ctx);
|
||||
future<> set_server_stream_manager(http_context& ctx, sharded<streaming::stream_manager>& sm);
|
||||
future<> unset_server_stream_manager(http_context& ctx);
|
||||
future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& p);
|
||||
future<> set_hinted_handoff(http_context& ctx, sharded<gms::gossiper>& g);
|
||||
future<> unset_hinted_handoff(http_context& ctx);
|
||||
future<> set_server_gossip_settle(http_context& ctx, sharded<gms::gossiper>& g);
|
||||
future<> set_server_cache(http_context& ctx);
|
||||
future<> set_server_compaction_manager(http_context& ctx);
|
||||
future<> set_server_done(http_context& ctx);
|
||||
future<> set_server_task_manager(http_context& ctx, sharded<tasks::task_manager>& tm, lw_shared_ptr<db::config> cfg);
|
||||
future<> unset_server_task_manager(http_context& ctx);
|
||||
future<> set_server_task_manager_test(http_context& ctx, sharded<tasks::task_manager>& tm);
|
||||
future<> unset_server_task_manager_test(http_context& ctx);
|
||||
future<> set_server_task_manager(http_context& ctx, lw_shared_ptr<db::config> cfg);
|
||||
future<> set_server_task_manager_test(http_context& ctx);
|
||||
|
||||
}
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include "api/authorization_cache.hh"
|
||||
#include "api/api.hh"
|
||||
#include "auth/common.hh"
|
||||
#include "auth/service.hh"
|
||||
|
||||
namespace api {
|
||||
using namespace json;
|
||||
|
||||
@@ -43,7 +43,7 @@ std::tuple<sstring, sstring> parse_fully_qualified_cf_name(sstring name) {
|
||||
return std::make_tuple(name.substr(0, pos), name.substr(end));
|
||||
}
|
||||
|
||||
table_id get_uuid(const sstring& ks, const sstring& cf, const replica::database& db) {
|
||||
const table_id& get_uuid(const sstring& ks, const sstring& cf, const replica::database& db) {
|
||||
try {
|
||||
return db.find_uuid(ks, cf);
|
||||
} catch (replica::no_such_column_family& e) {
|
||||
@@ -51,7 +51,7 @@ table_id get_uuid(const sstring& ks, const sstring& cf, const replica::database&
|
||||
}
|
||||
}
|
||||
|
||||
table_id get_uuid(const sstring& name, const replica::database& db) {
|
||||
const table_id& get_uuid(const sstring& name, const replica::database& db) {
|
||||
auto [ks, cf] = parse_fully_qualified_cf_name(name);
|
||||
return get_uuid(ks, cf, db);
|
||||
}
|
||||
@@ -135,9 +135,9 @@ static future<json::json_return_type> get_cf_histogram(http_context& ctx, const
|
||||
static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
std::function<utils::ihistogram(const replica::database&)> fun = [f] (const replica::database& db) {
|
||||
utils::ihistogram res;
|
||||
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) mutable {
|
||||
res += (table->get_stats().*f).hist;
|
||||
});
|
||||
for (auto i : db.get_column_families()) {
|
||||
res += (i.second->get_stats().*f).hist;
|
||||
}
|
||||
return res;
|
||||
};
|
||||
return ctx.db.map(fun).then([](const std::vector<utils::ihistogram> &res) {
|
||||
@@ -162,9 +162,9 @@ static future<json::json_return_type> get_cf_rate_and_histogram(http_context& c
|
||||
static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
std::function<utils::rate_moving_average_and_histogram(const replica::database&)> fun = [f] (const replica::database& db) {
|
||||
utils::rate_moving_average_and_histogram res;
|
||||
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
|
||||
res += (table->get_stats().*f).rate();
|
||||
});
|
||||
for (auto i : db.get_column_families()) {
|
||||
res += (i.second->get_stats().*f).rate();
|
||||
}
|
||||
return res;
|
||||
};
|
||||
return ctx.db.map(fun).then([](const std::vector<utils::rate_moving_average_and_histogram> &res) {
|
||||
@@ -306,21 +306,21 @@ ratio_holder filter_recent_false_positive_as_ratio_holder(const sstables::shared
|
||||
void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace>& sys_ks) {
|
||||
cf::get_column_family_name.set(r, [&ctx] (const_req req){
|
||||
std::vector<sstring> res;
|
||||
ctx.db.local().get_tables_metadata().for_each_table_id([&] (const std::pair<sstring, sstring>& kscf, table_id) {
|
||||
res.push_back(kscf.first + ":" + kscf.second);
|
||||
});
|
||||
for (auto i: ctx.db.local().get_column_families_mapping()) {
|
||||
res.push_back(i.first.first + ":" + i.first.second);
|
||||
}
|
||||
return res;
|
||||
});
|
||||
|
||||
cf::get_column_family.set(r, [&ctx] (std::unique_ptr<http::request> req){
|
||||
std::list<cf::column_family_info> res;
|
||||
ctx.db.local().get_tables_metadata().for_each_table_id([&] (const std::pair<sstring, sstring>& kscf, table_id) {
|
||||
std::list<cf::column_family_info> res;
|
||||
for (auto i: ctx.db.local().get_column_families_mapping()) {
|
||||
cf::column_family_info info;
|
||||
info.ks = kscf.first;
|
||||
info.cf = kscf.second;
|
||||
info.ks = i.first.first;
|
||||
info.cf = i.first.second;
|
||||
info.type = "ColumnFamilies";
|
||||
res.push_back(info);
|
||||
});
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(json::stream_range_as_array(std::move(res), std::identity()));
|
||||
});
|
||||
|
||||
@@ -1017,12 +1017,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
auto key = req->get_query_param("key");
|
||||
auto uuid = get_uuid(req->param["name"], ctx.db.local());
|
||||
|
||||
return ctx.db.map_reduce0([key, uuid] (replica::database& db) -> future<std::unordered_set<sstring>> {
|
||||
auto sstables = co_await db.find_column_family(uuid).get_sstables_by_partition_key(key);
|
||||
co_return boost::copy_range<std::unordered_set<sstring>>(sstables | boost::adaptors::transformed([] (auto s) { return s->get_filename(); }));
|
||||
return ctx.db.map_reduce0([key, uuid] (replica::database& db) {
|
||||
return db.find_column_family(uuid).get_sstables_by_partition_key(key);
|
||||
}, std::unordered_set<sstring>(),
|
||||
[](std::unordered_set<sstring> a, std::unordered_set<sstring>&& b) mutable {
|
||||
a.merge(b);
|
||||
[](std::unordered_set<sstring> a, std::unordered_set<sstring>&& b) mutable {
|
||||
a.insert(b.begin(),b.end());
|
||||
return a;
|
||||
}).then([](const std::unordered_set<sstring>& res) {
|
||||
return make_ready_future<json::json_return_type>(container_to_vec(res));
|
||||
@@ -1054,10 +1053,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
apilog.info("column_family/force_major_compaction: name={}", req->param["name"]);
|
||||
auto [ks, cf] = parse_fully_qualified_cf_name(req->param["name"]);
|
||||
auto keyspace = validate_keyspace(ctx, ks);
|
||||
std::vector<table_info> table_infos = {table_info{
|
||||
.name = cf,
|
||||
.id = ctx.db.local().find_uuid(ks, cf)
|
||||
}};
|
||||
std::vector<table_id> table_infos = {ctx.db.local().find_uuid(ks, cf)};
|
||||
|
||||
auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
|
||||
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, std::move(table_infos));
|
||||
|
||||
@@ -23,7 +23,7 @@ namespace api {
|
||||
void set_column_family(http_context& ctx, httpd::routes& r, sharded<db::system_keyspace>& sys_ks);
|
||||
void unset_column_family(http_context& ctx, httpd::routes& r);
|
||||
|
||||
table_id get_uuid(const sstring& name, const replica::database& db);
|
||||
const table_id& get_uuid(const sstring& name, const replica::database& db);
|
||||
future<> foreach_column_family(http_context& ctx, const sstring& name, std::function<void(replica::column_family&)> f);
|
||||
|
||||
|
||||
@@ -68,10 +68,9 @@ struct map_reduce_column_families_locally {
|
||||
std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)> reducer;
|
||||
future<std::unique_ptr<std::any>> operator()(replica::database& db) const {
|
||||
auto res = seastar::make_lw_shared<std::unique_ptr<std::any>>(std::make_unique<std::any>(init));
|
||||
return db.get_tables_metadata().for_each_table_gently([res, this] (table_id, seastar::lw_shared_ptr<replica::table> table) {
|
||||
*res = reducer(std::move(*res), mapper(*table.get()));
|
||||
return make_ready_future();
|
||||
}).then([res] () {
|
||||
return do_for_each(db.get_column_families(), [res, this](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) {
|
||||
*res = reducer(std::move(*res), mapper(*i.second.get()));
|
||||
}).then([res] {
|
||||
return std::move(*res);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -68,8 +68,8 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
cm::get_pending_tasks_by_table.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return ctx.db.map_reduce0([](replica::database& db) {
|
||||
return do_with(std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>(), [&db](std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>& tasks) {
|
||||
return db.get_tables_metadata().for_each_table_gently([&tasks] (table_id, lw_shared_ptr<replica::table> table) {
|
||||
replica::table& cf = *table.get();
|
||||
return do_for_each(db.get_column_families(), [&tasks](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) -> future<> {
|
||||
replica::table& cf = *i.second.get();
|
||||
tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.estimate_pending_compactions();
|
||||
return make_ready_future<>();
|
||||
}).then([&tasks] {
|
||||
|
||||
@@ -45,7 +45,7 @@ future<> get_config_swagger_entry(std::string_view name, const std::string& desc
|
||||
} else {
|
||||
ss <<',';
|
||||
};
|
||||
ss << "\"/v2/config/" << name <<"\": {"
|
||||
ss << "\"/config/" << name <<"\": {"
|
||||
"\"get\": {"
|
||||
"\"description\": \"" << boost::replace_all_copy(boost::replace_all_copy(boost::replace_all_copy(description,"\n","\\n"),"\"", "''"), "\t", " ") <<"\","
|
||||
"\"operationId\": \"find_config_"<< name <<"\","
|
||||
@@ -76,9 +76,9 @@ future<> get_config_swagger_entry(std::string_view name, const std::string& desc
|
||||
|
||||
namespace cs = httpd::config_json;
|
||||
|
||||
void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx, routes& r, const db::config& cfg, bool first) {
|
||||
rb->register_function(r, [&cfg, first] (output_stream<char>& os) {
|
||||
return do_with(first, [&os, &cfg] (bool& first) {
|
||||
void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx, routes& r, const db::config& cfg) {
|
||||
rb->register_function(r, [&cfg] (output_stream<char>& os) {
|
||||
return do_with(true, [&os, &cfg] (bool& first) {
|
||||
auto f = make_ready_future();
|
||||
for (auto&& cfg_ref : cfg.values()) {
|
||||
auto&& cfg = cfg_ref.get();
|
||||
|
||||
@@ -13,5 +13,5 @@
|
||||
|
||||
namespace api {
|
||||
|
||||
void set_config(std::shared_ptr<httpd::api_registry_builder20> rb, http_context& ctx, httpd::routes& r, const db::config& cfg, bool first = false);
|
||||
void set_config(std::shared_ptr<httpd::api_registry_builder20> rb, http_context& ctx, httpd::routes& r, const db::config& cfg);
|
||||
}
|
||||
|
||||
@@ -12,9 +12,7 @@
|
||||
#include <seastar/http/exception.hh>
|
||||
#include "log.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "utils/rjson.hh"
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include <seastar/util/short_streams.hh>
|
||||
|
||||
namespace api {
|
||||
using namespace seastar::httpd;
|
||||
@@ -26,27 +24,10 @@ void set_error_injection(http_context& ctx, routes& r) {
|
||||
hf::enable_injection.set(r, [](std::unique_ptr<request> req) {
|
||||
sstring injection = req->param["injection"];
|
||||
bool one_shot = req->get_query_param("one_shot") == "True";
|
||||
auto params = req->content;
|
||||
|
||||
const size_t max_params_size = 1024 * 1024;
|
||||
if (params.size() > max_params_size) {
|
||||
// This is a hard limit, because we don't want to allocate
|
||||
// too much memory or block the thread for too long.
|
||||
throw httpd::bad_param_exception(format("Injection parameters are too long, max length is {}", max_params_size));
|
||||
}
|
||||
|
||||
try {
|
||||
auto parameters = params.empty()
|
||||
? utils::error_injection_parameters{}
|
||||
: rjson::parse_to_map<utils::error_injection_parameters>(params);
|
||||
|
||||
auto& errinj = utils::get_local_injector();
|
||||
return errinj.enable_on_all(injection, one_shot, std::move(parameters)).then([] {
|
||||
return make_ready_future<json::json_return_type>(json::json_void());
|
||||
});
|
||||
} catch (const rjson::error& e) {
|
||||
throw httpd::bad_param_exception(format("Failed to parse injections parameters: {}", e.what()));
|
||||
}
|
||||
auto& errinj = utils::get_local_injector();
|
||||
return errinj.enable_on_all(injection, one_shot).then([] {
|
||||
return make_ready_future<json::json_return_type>(json::json_void());
|
||||
});
|
||||
});
|
||||
|
||||
hf::get_enabled_injections_on_all.set(r, [](std::unique_ptr<request> req) {
|
||||
@@ -71,13 +52,6 @@ void set_error_injection(http_context& ctx, routes& r) {
|
||||
});
|
||||
});
|
||||
|
||||
hf::message_injection.set(r, [](std::unique_ptr<request> req) {
|
||||
sstring injection = req->param["injection"];
|
||||
auto& errinj = utils::get_local_injector();
|
||||
return errinj.receive_message_on_all(injection).then([] {
|
||||
return make_ready_future<json::json_return_type>(json::json_void());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace api
|
||||
|
||||
@@ -19,25 +19,24 @@ namespace fd = httpd::failure_detector_json;
|
||||
void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
fd::get_all_endpoint_states.set(r, [&g](std::unique_ptr<request> req) {
|
||||
std::vector<fd::endpoint_state> res;
|
||||
res.reserve(g.num_endpoints());
|
||||
g.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& eps) {
|
||||
for (auto i : g.get_endpoint_states()) {
|
||||
fd::endpoint_state val;
|
||||
val.addrs = fmt::to_string(addr);
|
||||
val.is_alive = g.is_alive(addr);
|
||||
val.generation = eps.get_heart_beat_state().get_generation().value();
|
||||
val.version = eps.get_heart_beat_state().get_heart_beat_version().value();
|
||||
val.update_time = eps.get_update_timestamp().time_since_epoch().count();
|
||||
for (const auto& [as_type, app_state] : eps.get_application_state_map()) {
|
||||
val.addrs = fmt::to_string(i.first);
|
||||
val.is_alive = i.second.is_alive();
|
||||
val.generation = i.second.get_heart_beat_state().get_generation().value();
|
||||
val.version = i.second.get_heart_beat_state().get_heart_beat_version().value();
|
||||
val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
|
||||
for (auto a : i.second.get_application_state_map()) {
|
||||
fd::version_value version_val;
|
||||
// We return the enum index and not it's name to stay compatible to origin
|
||||
// method that the state index are static but the name can be changed.
|
||||
version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(as_type);
|
||||
version_val.value = app_state.value();
|
||||
version_val.version = app_state.version().value();
|
||||
version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
|
||||
version_val.value = a.second.value();
|
||||
version_val.version = a.second.version().value();
|
||||
val.application_state.push(version_val);
|
||||
}
|
||||
res.emplace_back(std::move(val));
|
||||
});
|
||||
res.push_back(val);
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
@@ -57,9 +56,9 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
|
||||
fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
|
||||
std::map<sstring, sstring> nodes_status;
|
||||
g.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state&) {
|
||||
nodes_status.emplace(node.to_sstring(), g.is_alive(node) ? "UP" : "DOWN");
|
||||
});
|
||||
for (auto& entry : g.get_endpoint_states()) {
|
||||
nodes_status.emplace(entry.first.to_sstring(), entry.second.is_alive() ? "UP" : "DOWN");
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
|
||||
});
|
||||
|
||||
@@ -71,7 +70,7 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
});
|
||||
|
||||
fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
|
||||
auto state = g.get_endpoint_state_ptr(gms::inet_address(req->param["addr"]));
|
||||
auto* state = g.get_endpoint_state_for_endpoint_ptr(gms::inet_address(req->param["addr"]));
|
||||
if (!state) {
|
||||
return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
|
||||
}
|
||||
|
||||
@@ -6,11 +6,8 @@
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
|
||||
#include "gossiper.hh"
|
||||
#include "api/api-doc/gossiper.json.hh"
|
||||
#include "gms/endpoint_state.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
|
||||
namespace api {
|
||||
@@ -18,9 +15,9 @@ using namespace seastar::httpd;
|
||||
using namespace json;
|
||||
|
||||
void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
httpd::gossiper_json::get_down_endpoint.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
|
||||
auto res = co_await g.get_unreachable_members_synchronized();
|
||||
co_return json::json_return_type(container_to_vec(res));
|
||||
httpd::gossiper_json::get_down_endpoint.set(r, [&g] (const_req req) {
|
||||
auto res = g.get_unreachable_members();
|
||||
return container_to_vec(res);
|
||||
});
|
||||
|
||||
|
||||
@@ -30,11 +27,9 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
});
|
||||
});
|
||||
|
||||
httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
|
||||
gms::inet_address ep(req->param["addr"]);
|
||||
// synchronize unreachable_members on all shards
|
||||
co_await g.get_unreachable_members_synchronized();
|
||||
co_return g.get_endpoint_downtime(ep);
|
||||
httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (const_req req) {
|
||||
gms::inet_address ep(req.param["addr"]);
|
||||
return g.get_endpoint_downtime(ep);
|
||||
});
|
||||
|
||||
httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
|
||||
@@ -64,7 +59,7 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
|
||||
httpd::gossiper_json::force_remove_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
|
||||
gms::inet_address ep(req->param["addr"]);
|
||||
return g.force_remove_endpoint(ep, gms::null_permit_id).then([] {
|
||||
return g.force_remove_endpoint(ep).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "api/api-doc/hinted_handoff.json.hh"
|
||||
|
||||
#include "gms/inet_address.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
|
||||
namespace api {
|
||||
@@ -21,33 +22,38 @@ using namespace json;
|
||||
using namespace seastar::httpd;
|
||||
namespace hh = httpd::hinted_handoff_json;
|
||||
|
||||
void set_hinted_handoff(http_context& ctx, routes& r, sharded<service::storage_proxy>& proxy) {
|
||||
hh::create_hints_sync_point.set(r, [&proxy] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto parse_hosts_list = [] (sstring arg) {
|
||||
void set_hinted_handoff(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
hh::create_hints_sync_point.set(r, [&ctx, &g] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto parse_hosts_list = [&g] (sstring arg) {
|
||||
std::vector<sstring> hosts_str = split(arg, ",");
|
||||
std::vector<gms::inet_address> hosts;
|
||||
hosts.reserve(hosts_str.size());
|
||||
|
||||
for (const auto& host_str : hosts_str) {
|
||||
try {
|
||||
gms::inet_address host;
|
||||
host = gms::inet_address(host_str);
|
||||
hosts.push_back(host);
|
||||
} catch (std::exception& e) {
|
||||
throw httpd::bad_param_exception(format("Failed to parse host address {}: {}", host_str, e.what()));
|
||||
if (hosts_str.empty()) {
|
||||
// No target_hosts specified means that we should wait for hints for all nodes to be sent
|
||||
const auto members_set = g.get_live_members();
|
||||
std::copy(members_set.begin(), members_set.end(), std::back_inserter(hosts));
|
||||
} else {
|
||||
for (const auto& host_str : hosts_str) {
|
||||
try {
|
||||
gms::inet_address host;
|
||||
host = gms::inet_address(host_str);
|
||||
hosts.push_back(host);
|
||||
} catch (std::exception& e) {
|
||||
throw httpd::bad_param_exception(format("Failed to parse host address {}: {}", host_str, e.what()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hosts;
|
||||
};
|
||||
|
||||
std::vector<gms::inet_address> target_hosts = parse_hosts_list(req->get_query_param("target_hosts"));
|
||||
return proxy.local().create_hint_sync_point(std::move(target_hosts)).then([] (db::hints::sync_point sync_point) {
|
||||
return ctx.sp.local().create_hint_sync_point(std::move(target_hosts)).then([] (db::hints::sync_point sync_point) {
|
||||
return json::json_return_type(sync_point.encode());
|
||||
});
|
||||
});
|
||||
|
||||
hh::get_hints_sync_point.set(r, [&proxy] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
hh::get_hints_sync_point.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
db::hints::sync_point sync_point;
|
||||
const sstring encoded = req->get_query_param("id");
|
||||
try {
|
||||
@@ -81,7 +87,7 @@ void set_hinted_handoff(http_context& ctx, routes& r, sharded<service::storage_p
|
||||
using return_type = hh::ns_get_hints_sync_point::get_hints_sync_point_return_type;
|
||||
using return_type_wrapper = hh::ns_get_hints_sync_point::return_type_wrapper;
|
||||
|
||||
return proxy.local().wait_for_hint_sync_point(std::move(sync_point), deadline).then([] {
|
||||
return ctx.sp.local().wait_for_hint_sync_point(std::move(sync_point), deadline).then([] {
|
||||
return json::json_return_type(return_type_wrapper(return_type::DONE));
|
||||
}).handle_exception_type([] (const timed_out_error&) {
|
||||
return json::json_return_type(return_type_wrapper(return_type::IN_PROGRESS));
|
||||
|
||||
@@ -8,14 +8,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include "api.hh"
|
||||
|
||||
namespace service { class storage_proxy; }
|
||||
namespace gms {
|
||||
|
||||
class gossiper;
|
||||
|
||||
}
|
||||
|
||||
namespace api {
|
||||
|
||||
void set_hinted_handoff(http_context& ctx, httpd::routes& r, sharded<service::storage_proxy>& p);
|
||||
void set_hinted_handoff(http_context& ctx, httpd::routes& r, gms::gossiper& g);
|
||||
void unset_hinted_handoff(http_context& ctx, httpd::routes& r);
|
||||
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "api/api-doc/storage_proxy.json.hh"
|
||||
#include "api/api-doc/utils.json.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "db/config.hh"
|
||||
#include "utils/histogram.hh"
|
||||
#include "replica/database.hh"
|
||||
@@ -115,17 +116,17 @@ utils_json::estimated_histogram time_to_json_histogram(const utils::time_estimat
|
||||
return res;
|
||||
}
|
||||
|
||||
static future<json::json_return_type> sum_estimated_histogram(sharded<service::storage_proxy>& proxy, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
|
||||
return two_dimensional_map_reduce(proxy, [f] (service::storage_proxy_stats::stats& stats) {
|
||||
static future<json::json_return_type> sum_estimated_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
|
||||
return two_dimensional_map_reduce(ctx.sp, [f] (service::storage_proxy_stats::stats& stats) {
|
||||
return (stats.*f).histogram();
|
||||
}, utils::time_estimated_histogram_merge, utils::time_estimated_histogram()).then([](const utils::time_estimated_histogram& val) {
|
||||
return make_ready_future<json::json_return_type>(time_to_json_histogram(val));
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> sum_estimated_histogram(sharded<service::storage_proxy>& proxy, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {
|
||||
static future<json::json_return_type> sum_estimated_histogram(http_context& ctx, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {
|
||||
|
||||
return two_dimensional_map_reduce(proxy, f, utils::estimated_histogram_merge,
|
||||
return two_dimensional_map_reduce(ctx.sp, f, utils::estimated_histogram_merge,
|
||||
utils::estimated_histogram()).then([](const utils::estimated_histogram& val) {
|
||||
utils_json::estimated_histogram res;
|
||||
res = val;
|
||||
@@ -133,8 +134,8 @@ static future<json::json_return_type> sum_estimated_histogram(sharded<service::
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> total_latency(sharded<service::storage_proxy>& proxy, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
|
||||
return two_dimensional_map_reduce(proxy, [f] (service::storage_proxy_stats::stats& stats) {
|
||||
static future<json::json_return_type> total_latency(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram service::storage_proxy_stats::stats::*f) {
|
||||
return two_dimensional_map_reduce(ctx.sp, [f] (service::storage_proxy_stats::stats& stats) {
|
||||
return (stats.*f).hist.mean * (stats.*f).hist.count;
|
||||
}, std::plus<double>(), 0.0).then([](double val) {
|
||||
int64_t res = val;
|
||||
@@ -183,43 +184,43 @@ sum_timer_stats_storage_proxy(distributed<proxy>& d,
|
||||
});
|
||||
}
|
||||
|
||||
void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_proxy>& proxy) {
|
||||
void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_service>& ss) {
|
||||
sp::get_total_hints.set(r, [](std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
sp::get_hinted_handoff_enabled.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
const auto& filter = proxy.local().get_hints_host_filter();
|
||||
sp::get_hinted_handoff_enabled.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
const auto& filter = ctx.sp.local().get_hints_host_filter();
|
||||
return make_ready_future<json::json_return_type>(!filter.is_disabled_for_all());
|
||||
});
|
||||
|
||||
sp::set_hinted_handoff_enabled.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
sp::set_hinted_handoff_enabled.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto enable = req->get_query_param("enable");
|
||||
auto filter = (enable == "true" || enable == "1")
|
||||
? db::hints::host_filter(db::hints::host_filter::enabled_for_all_tag {})
|
||||
: db::hints::host_filter(db::hints::host_filter::disabled_for_all_tag {});
|
||||
return proxy.invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
|
||||
return ctx.sp.invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
|
||||
return sp.change_hints_host_filter(filter);
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
sp::get_hinted_handoff_enabled_by_dc.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
sp::get_hinted_handoff_enabled_by_dc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
std::vector<sstring> res;
|
||||
const auto& filter = proxy.local().get_hints_host_filter();
|
||||
const auto& filter = ctx.sp.local().get_hints_host_filter();
|
||||
const auto& dcs = filter.get_dcs();
|
||||
res.reserve(res.size());
|
||||
std::copy(dcs.begin(), dcs.end(), std::back_inserter(res));
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
sp::set_hinted_handoff_enabled_by_dc_list.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
sp::set_hinted_handoff_enabled_by_dc_list.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto dcs = req->get_query_param("dcs");
|
||||
auto filter = db::hints::host_filter::parse_from_dc_list(std::move(dcs));
|
||||
return proxy.invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
|
||||
return ctx.sp.invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
|
||||
return sp.change_hints_host_filter(filter);
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
@@ -341,131 +342,144 @@ void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_pr
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
sp::get_read_repair_attempted.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read_repair_attempts);
|
||||
sp::get_read_repair_attempted.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_attempts);
|
||||
});
|
||||
|
||||
sp::get_read_repair_repaired_blocking.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read_repair_repaired_blocking);
|
||||
sp::get_read_repair_repaired_blocking.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_repaired_blocking);
|
||||
});
|
||||
|
||||
sp::get_read_repair_repaired_background.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read_repair_repaired_background);
|
||||
sp::get_read_repair_repaired_background.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_repaired_background);
|
||||
});
|
||||
|
||||
sp::get_cas_read_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &proxy::stats::cas_read_timeouts);
|
||||
sp::get_schema_versions.set(r, [&ss](std::unique_ptr<http::request> req) {
|
||||
return ss.local().describe_schema_versions().then([] (auto result) {
|
||||
std::vector<sp::mapper_list> res;
|
||||
for (auto e : result) {
|
||||
sp::mapper_list entry;
|
||||
entry.key = std::move(e.first);
|
||||
entry.value = std::move(e.second);
|
||||
res.emplace_back(std::move(entry));
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(std::move(res));
|
||||
});
|
||||
});
|
||||
|
||||
sp::get_cas_read_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &proxy::stats::cas_read_unavailables);
|
||||
sp::get_cas_read_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::cas_read_timeouts);
|
||||
});
|
||||
|
||||
sp::get_cas_write_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &proxy::stats::cas_write_timeouts);
|
||||
sp::get_cas_read_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::cas_read_unavailables);
|
||||
});
|
||||
|
||||
sp::get_cas_write_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &proxy::stats::cas_write_unavailables);
|
||||
sp::get_cas_write_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::cas_write_timeouts);
|
||||
});
|
||||
|
||||
sp::get_cas_write_metrics_unfinished_commit.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_stats(proxy, &proxy::stats::cas_write_unfinished_commit);
|
||||
sp::get_cas_write_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::cas_write_unavailables);
|
||||
});
|
||||
|
||||
sp::get_cas_write_metrics_contention.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_estimated_histogram(proxy, &proxy::stats::cas_write_contention);
|
||||
sp::get_cas_write_metrics_unfinished_commit.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_stats(ctx.sp, &proxy::stats::cas_write_unfinished_commit);
|
||||
});
|
||||
|
||||
sp::get_cas_write_metrics_condition_not_met.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_stats(proxy, &proxy::stats::cas_write_condition_not_met);
|
||||
sp::get_cas_write_metrics_contention.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_estimated_histogram(ctx, &proxy::stats::cas_write_contention);
|
||||
});
|
||||
|
||||
sp::get_cas_write_metrics_failed_read_round_optimization.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_stats(proxy, &proxy::stats::cas_failed_read_round_optimization);
|
||||
sp::get_cas_write_metrics_condition_not_met.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_stats(ctx.sp, &proxy::stats::cas_write_condition_not_met);
|
||||
});
|
||||
|
||||
sp::get_cas_read_metrics_unfinished_commit.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_stats(proxy, &proxy::stats::cas_read_unfinished_commit);
|
||||
sp::get_cas_write_metrics_failed_read_round_optimization.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_stats(ctx.sp, &proxy::stats::cas_failed_read_round_optimization);
|
||||
});
|
||||
|
||||
sp::get_cas_read_metrics_contention.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_estimated_histogram(proxy, &proxy::stats::cas_read_contention);
|
||||
sp::get_cas_read_metrics_unfinished_commit.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_stats(ctx.sp, &proxy::stats::cas_read_unfinished_commit);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::read_timeouts);
|
||||
sp::get_cas_read_metrics_contention.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_estimated_histogram(ctx, &proxy::stats::cas_read_contention);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::read_unavailables);
|
||||
sp::get_read_metrics_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::read_timeouts);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::range_slice_timeouts);
|
||||
sp::get_read_metrics_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::read_unavailables);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::range_slice_unavailables);
|
||||
sp::get_range_metrics_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::range_slice_timeouts);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_timeouts.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::write_timeouts);
|
||||
sp::get_range_metrics_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::range_slice_unavailables);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_unavailables.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(proxy, &service::storage_proxy_stats::stats::write_unavailables);
|
||||
sp::get_write_metrics_timeouts.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::write_timeouts);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_timeouts_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::read_timeouts);
|
||||
sp::get_write_metrics_unavailables.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::write_unavailables);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_unavailables_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::read_unavailables);
|
||||
sp::get_read_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::read_timeouts);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_timeouts_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::range_slice_timeouts);
|
||||
sp::get_read_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::read_unavailables);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_unavailables_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::range_slice_unavailables);
|
||||
sp::get_range_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::range_slice_timeouts);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_timeouts_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::write_timeouts);
|
||||
sp::get_range_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::range_slice_unavailables);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_unavailables_rates.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(proxy, &service::storage_proxy_stats::stats::write_unavailables);
|
||||
sp::get_write_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::write_timeouts);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_latency_histogram_depricated.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_histogram_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::range);
|
||||
sp::get_write_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::write_unavailables);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_latency_histogram_depricated.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_histogram_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::write);
|
||||
sp::get_range_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_latency_histogram_depricated.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_histogram_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read);
|
||||
sp::get_write_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::write);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::range);
|
||||
sp::get_read_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::write);
|
||||
});
|
||||
sp::get_cas_write_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats(proxy, &proxy::stats::cas_write);
|
||||
sp::get_range_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
|
||||
});
|
||||
|
||||
sp::get_cas_read_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats(proxy, &proxy::stats::cas_read);
|
||||
sp::get_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::write);
|
||||
});
|
||||
sp::get_cas_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats(ctx.sp, &proxy::stats::cas_write);
|
||||
});
|
||||
|
||||
sp::get_cas_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats(ctx.sp, &proxy::stats::cas_read);
|
||||
});
|
||||
|
||||
sp::get_view_write_metrics_latency_histogram.set(r, [](std::unique_ptr<http::request> req) {
|
||||
@@ -476,31 +490,31 @@ void set_storage_proxy(http_context& ctx, routes& r, sharded<service::storage_pr
|
||||
return make_ready_future<json::json_return_type>(get_empty_moving_average());
|
||||
});
|
||||
|
||||
sp::get_read_metrics_latency_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::read);
|
||||
sp::get_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read);
|
||||
});
|
||||
|
||||
sp::get_read_estimated_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_estimated_histogram(proxy, &service::storage_proxy_stats::stats::read);
|
||||
sp::get_read_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::read);
|
||||
});
|
||||
|
||||
sp::get_read_latency.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return total_latency(proxy, &service::storage_proxy_stats::stats::read);
|
||||
sp::get_read_latency.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return total_latency(ctx, &service::storage_proxy_stats::stats::read);
|
||||
});
|
||||
sp::get_write_estimated_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_estimated_histogram(proxy, &service::storage_proxy_stats::stats::write);
|
||||
sp::get_write_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::write);
|
||||
});
|
||||
|
||||
sp::get_write_latency.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return total_latency(proxy, &service::storage_proxy_stats::stats::write);
|
||||
sp::get_write_latency.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return total_latency(ctx, &service::storage_proxy_stats::stats::write);
|
||||
});
|
||||
|
||||
sp::get_range_estimated_histogram.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats_storage_proxy(proxy, &service::storage_proxy_stats::stats::range);
|
||||
sp::get_range_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
|
||||
});
|
||||
|
||||
sp::get_range_latency.set(r, [&proxy](std::unique_ptr<http::request> req) {
|
||||
return total_latency(proxy, &service::storage_proxy_stats::stats::range);
|
||||
sp::get_range_latency.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return total_latency(ctx, &service::storage_proxy_stats::stats::range);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -533,6 +547,7 @@ void unset_storage_proxy(http_context& ctx, routes& r) {
|
||||
sp::get_read_repair_attempted.unset(r);
|
||||
sp::get_read_repair_repaired_blocking.unset(r);
|
||||
sp::get_read_repair_repaired_background.unset(r);
|
||||
sp::get_schema_versions.unset(r);
|
||||
sp::get_cas_read_timeouts.unset(r);
|
||||
sp::get_cas_read_unavailables.unset(r);
|
||||
sp::get_cas_write_timeouts.unset(r);
|
||||
|
||||
@@ -11,11 +11,11 @@
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include "api.hh"
|
||||
|
||||
namespace service { class storage_proxy; }
|
||||
namespace service { class storage_service; }
|
||||
|
||||
namespace api {
|
||||
|
||||
void set_storage_proxy(http_context& ctx, httpd::routes& r, sharded<service::storage_proxy>& proxy);
|
||||
void set_storage_proxy(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss);
|
||||
void unset_storage_proxy(http_context& ctx, httpd::routes& r);
|
||||
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
|
||||
#include "storage_service.hh"
|
||||
#include "api/api-doc/storage_service.json.hh"
|
||||
#include "api/api-doc/storage_proxy.json.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "utils/hash.hh"
|
||||
@@ -43,6 +42,7 @@
|
||||
#include "thrift/controller.hh"
|
||||
#include "locator/token_metadata.hh"
|
||||
#include "cdc/generation_service.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "locator/abstract_replication_strategy.hh"
|
||||
#include "sstables_loader.hh"
|
||||
#include "db/view/view_builder.hh"
|
||||
@@ -52,10 +52,22 @@ using namespace std::chrono_literals;
|
||||
|
||||
extern logging::logger apilog;
|
||||
|
||||
namespace std {
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const api::table_info& ti) {
|
||||
fmt::print(os, "table{{name={}, id={}}}", ti.name, ti.id);
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace api {
|
||||
|
||||
const locator::token_metadata& http_context::get_token_metadata() {
|
||||
return *shared_token_metadata.local().get();
|
||||
}
|
||||
|
||||
namespace ss = httpd::storage_service_json;
|
||||
namespace sp = httpd::storage_proxy_json;
|
||||
using namespace json;
|
||||
|
||||
sstring validate_keyspace(http_context& ctx, sstring ks_name) {
|
||||
@@ -317,7 +329,7 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
|
||||
ss::repair_async.set(r, [&ctx, &repair](std::unique_ptr<http::request> req) {
|
||||
static std::vector<sstring> options = {"primaryRange", "parallelism", "incremental",
|
||||
"jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "ignore_nodes", "trace",
|
||||
"startToken", "endToken", "ranges_parallelism"};
|
||||
"startToken", "endToken" };
|
||||
std::unordered_map<sstring, sstring> options_map;
|
||||
for (auto o : options) {
|
||||
auto s = req->get_query_param(o);
|
||||
@@ -462,21 +474,29 @@ static future<json::json_return_type> describe_ring_as_json(sharded<service::sto
|
||||
co_return json::json_return_type(stream_range_as_array(co_await ss.local().describe_ring(keyspace), token_range_endpoints_to_json));
|
||||
}
|
||||
|
||||
void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
|
||||
ss::local_hostid.set(r, [&ss](std::unique_ptr<http::request> req) {
|
||||
auto id = ss.local().get_token_metadata().get_my_id();
|
||||
static std::vector<table_id> get_table_ids(const std::vector<table_info>& table_infos) {
|
||||
std::vector<table_id> table_ids{table_infos.size()};
|
||||
boost::transform(table_infos, table_ids.begin(), [] (const auto& ti) {
|
||||
return ti.id;
|
||||
});
|
||||
return table_ids;
|
||||
}
|
||||
|
||||
void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ks) {
|
||||
ss::local_hostid.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto id = ctx.db.local().get_config().host_id;
|
||||
return make_ready_future<json::json_return_type>(id.to_sstring());
|
||||
});
|
||||
|
||||
ss::get_tokens.set(r, [&ss] (std::unique_ptr<http::request> req) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ss.local().get_token_metadata().sorted_tokens(), [](const dht::token& i) {
|
||||
ss::get_tokens.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().sorted_tokens(), [](const dht::token& i) {
|
||||
return fmt::to_string(i);
|
||||
}));
|
||||
});
|
||||
|
||||
ss::get_node_tokens.set(r, [&ss] (std::unique_ptr<http::request> req) {
|
||||
ss::get_node_tokens.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
gms::inet_address addr(req->param["endpoint"]);
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ss.local().get_token_metadata().get_tokens(addr), [](const dht::token& i) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().get_tokens(addr), [](const dht::token& i) {
|
||||
return fmt::to_string(i);
|
||||
}));
|
||||
});
|
||||
@@ -544,8 +564,8 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
});
|
||||
|
||||
ss::get_leaving_nodes.set(r, [&ss](const_req req) {
|
||||
return container_to_vec(ss.local().get_token_metadata().get_leaving_endpoints());
|
||||
ss::get_leaving_nodes.set(r, [&ctx](const_req req) {
|
||||
return container_to_vec(ctx.get_token_metadata().get_leaving_endpoints());
|
||||
});
|
||||
|
||||
ss::get_moving_nodes.set(r, [](const_req req) {
|
||||
@@ -553,8 +573,8 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
return container_to_vec(addr);
|
||||
});
|
||||
|
||||
ss::get_joining_nodes.set(r, [&ss](const_req req) {
|
||||
auto points = ss.local().get_token_metadata().get_bootstrap_tokens();
|
||||
ss::get_joining_nodes.set(r, [&ctx](const_req req) {
|
||||
auto points = ctx.get_token_metadata().get_bootstrap_tokens();
|
||||
std::unordered_set<sstring> addr;
|
||||
for (auto i: points) {
|
||||
addr.insert(fmt::to_string(i.second));
|
||||
@@ -626,9 +646,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
return describe_ring_as_json(ss, validate_keyspace(ctx, req->param));
|
||||
});
|
||||
|
||||
ss::get_host_id_map.set(r, [&ss](const_req req) {
|
||||
ss::get_host_id_map.set(r, [&ctx](const_req req) {
|
||||
std::vector<ss::mapper> res;
|
||||
return map_to_key_value(ss.local().get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
|
||||
return map_to_key_value(ctx.get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
|
||||
});
|
||||
|
||||
ss::get_load.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
@@ -648,9 +668,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
});
|
||||
|
||||
ss::get_current_generation_number.set(r, [&ss](std::unique_ptr<http::request> req) {
|
||||
ss::get_current_generation_number.set(r, [&g](std::unique_ptr<http::request> req) {
|
||||
gms::inet_address ep(utils::fb_utilities::get_broadcast_address());
|
||||
return ss.local().gossiper().get_current_generation_number(ep).then([](gms::generation_type res) {
|
||||
return g.get_current_generation_number(ep).then([](gms::generation_type res) {
|
||||
return make_ready_future<json::json_return_type>(res.value());
|
||||
});
|
||||
});
|
||||
@@ -661,10 +681,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
req.get_query_param("key")));
|
||||
});
|
||||
|
||||
ss::cdc_streams_check_and_repair.set(r, [&ss] (std::unique_ptr<http::request> req) {
|
||||
return ss.invoke_on(0, [] (service::storage_service& ss) {
|
||||
return ss.check_and_repair_cdc_streams();
|
||||
}).then([] {
|
||||
ss::cdc_streams_check_and_repair.set(r, [&cdc_gs] (std::unique_ptr<http::request> req) {
|
||||
if (!cdc_gs.local_is_initialized()) {
|
||||
throw std::runtime_error("get_cdc_generation_service: not initialized yet");
|
||||
}
|
||||
return cdc_gs.local().check_and_repair_cdc_streams().then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
@@ -676,7 +697,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
apilog.debug("force_keyspace_compaction: keyspace={} tables={}", keyspace, table_infos);
|
||||
|
||||
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), db, table_infos);
|
||||
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), db, get_table_ids(table_infos));
|
||||
try {
|
||||
co_await task->done();
|
||||
} catch (...) {
|
||||
@@ -699,7 +720,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
}
|
||||
|
||||
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||
auto task = co_await compaction_module.make_and_start_task<cleanup_keyspace_compaction_task_impl>({}, std::move(keyspace), db, table_infos);
|
||||
auto task = co_await compaction_module.make_and_start_task<cleanup_keyspace_compaction_task_impl>({}, std::move(keyspace), db, get_table_ids(table_infos));
|
||||
try {
|
||||
co_await task->done();
|
||||
} catch (...) {
|
||||
@@ -714,7 +735,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, table_infos);
|
||||
bool res = false;
|
||||
auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
|
||||
auto task = co_await compaction_module.make_and_start_task<offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, table_infos, res);
|
||||
auto task = co_await compaction_module.make_and_start_task<offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, get_table_ids(table_infos), res);
|
||||
try {
|
||||
co_await task->done();
|
||||
} catch (...) {
|
||||
@@ -732,7 +753,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, table_infos, exclude_current_version);
|
||||
|
||||
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||
auto task = co_await compaction_module.make_and_start_task<upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, table_infos, exclude_current_version);
|
||||
auto task = co_await compaction_module.make_and_start_task<upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, get_table_ids(table_infos), exclude_current_version);
|
||||
try {
|
||||
co_await task->done();
|
||||
} catch (...) {
|
||||
@@ -773,16 +794,21 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
|
||||
ss::remove_node.set(r, [&ss](std::unique_ptr<http::request> req) {
|
||||
auto host_id = validate_host_id(req->get_query_param("host_id"));
|
||||
std::vector<sstring> ignore_nodes_strs = utils::split_comma_separated_list(req->get_query_param("ignore_nodes"));
|
||||
std::vector<sstring> ignore_nodes_strs= split(req->get_query_param("ignore_nodes"), ",");
|
||||
apilog.info("remove_node: host_id={} ignore_nodes={}", host_id, ignore_nodes_strs);
|
||||
auto ignore_nodes = std::list<locator::host_id_or_endpoint>();
|
||||
for (const sstring& n : ignore_nodes_strs) {
|
||||
for (std::string n : ignore_nodes_strs) {
|
||||
try {
|
||||
auto hoep = locator::host_id_or_endpoint(n);
|
||||
if (!ignore_nodes.empty() && hoep.has_host_id() != ignore_nodes.front().has_host_id()) {
|
||||
throw std::runtime_error("All nodes should be identified using the same method: either Host IDs or ip addresses.");
|
||||
std::replace(n.begin(), n.end(), '\"', ' ');
|
||||
std::replace(n.begin(), n.end(), '\'', ' ');
|
||||
boost::trim_all(n);
|
||||
if (!n.empty()) {
|
||||
auto hoep = locator::host_id_or_endpoint(n);
|
||||
if (!ignore_nodes.empty() && hoep.has_host_id() != ignore_nodes.front().has_host_id()) {
|
||||
throw std::runtime_error("All nodes should be identified using the same method: either Host IDs or ip addresses.");
|
||||
}
|
||||
ignore_nodes.push_back(std::move(hoep));
|
||||
}
|
||||
ignore_nodes.push_back(std::move(hoep));
|
||||
} catch (...) {
|
||||
throw std::runtime_error(format("Failed to parse ignore_nodes parameter: ignore_nodes={}, node={}: {}", ignore_nodes_strs, n, std::current_exception()));
|
||||
}
|
||||
@@ -895,11 +921,11 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
ss::is_initialized.set(r, [&ss](std::unique_ptr<http::request> req) {
|
||||
return ss.local().get_operation_mode().then([&ss] (auto mode) {
|
||||
ss::is_initialized.set(r, [&ss, &g](std::unique_ptr<http::request> req) {
|
||||
return ss.local().get_operation_mode().then([&g] (auto mode) {
|
||||
bool is_initialized = mode >= service::storage_service::mode::STARTING;
|
||||
if (mode == service::storage_service::mode::NORMAL) {
|
||||
is_initialized = ss.local().gossiper().is_enabled();
|
||||
is_initialized = g.is_enabled();
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(is_initialized);
|
||||
});
|
||||
@@ -968,9 +994,10 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
ks.set_incremental_backups(value);
|
||||
}
|
||||
|
||||
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
|
||||
table->set_incremental_backups(value);
|
||||
});
|
||||
for (auto& pair: db.get_column_families()) {
|
||||
auto cf_ptr = pair.second;
|
||||
cf_ptr->set_incremental_backups(value);
|
||||
}
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
@@ -1011,11 +1038,13 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
ss::reset_local_schema.set(r, [&ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
ss::reset_local_schema.set(r, [&ctx, &sys_ks](std::unique_ptr<http::request> req) {
|
||||
// FIXME: We should truncate schema tables if more than one node in the cluster.
|
||||
auto& fs = ctx.sp.local().features();
|
||||
apilog.info("reset_local_schema");
|
||||
co_await ss.local().reload_schema();
|
||||
co_return json_void();
|
||||
return db::schema_tables::recalculate_schema_version(sys_ks, ctx.sp, fs).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
ss::set_trace_probability.set(r, [](std::unique_ptr<http::request> req) {
|
||||
@@ -1120,12 +1149,12 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
ss::get_cluster_name.set(r, [&ss](const_req req) {
|
||||
return ss.local().gossiper().get_cluster_name();
|
||||
ss::get_cluster_name.set(r, [&g](const_req req) {
|
||||
return g.get_cluster_name();
|
||||
});
|
||||
|
||||
ss::get_partitioner_name.set(r, [&ss](const_req req) {
|
||||
return ss.local().gossiper().get_partitioner_name();
|
||||
ss::get_partitioner_name.set(r, [&g](const_req req) {
|
||||
return g.get_partitioner_name();
|
||||
});
|
||||
|
||||
ss::get_tombstone_warn_threshold.set(r, [](std::unique_ptr<http::request> req) {
|
||||
@@ -1243,7 +1272,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
|
||||
auto& ext = db.get_config().extensions();
|
||||
|
||||
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> t) {
|
||||
for (auto& t : db.get_column_families() | boost::adaptors::map_values) {
|
||||
auto& schema = t->schema();
|
||||
if ((ks.empty() || ks == schema->ks_name()) && (cf.empty() || cf == schema->cf_name())) {
|
||||
// at most Nsstables long
|
||||
@@ -1324,7 +1353,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
}
|
||||
res.emplace_back(std::move(tst));
|
||||
}
|
||||
});
|
||||
}
|
||||
std::sort(res.begin(), res.end(), [](const ss::table_sstables& t1, const ss::table_sstables& t2) {
|
||||
return t1.keyspace() < t2.keyspace() || (t1.keyspace() == t2.keyspace() && t1.table() < t2.table());
|
||||
});
|
||||
@@ -1334,123 +1363,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
ss::reload_raft_topology_state.set(r,
|
||||
[&ss, &group0_client] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
co_await ss.invoke_on(0, [&group0_client] (service::storage_service& ss) -> future<> {
|
||||
apilog.info("Waiting for group 0 read/apply mutex before reloading Raft topology state...");
|
||||
auto holder = co_await group0_client.hold_read_apply_mutex();
|
||||
apilog.info("Reloading Raft topology state");
|
||||
// Using topology_transition() instead of topology_state_load(), because the former notifies listeners
|
||||
co_await ss.topology_transition();
|
||||
apilog.info("Reloaded Raft topology state");
|
||||
});
|
||||
co_return json_void();
|
||||
});
|
||||
|
||||
sp::get_schema_versions.set(r, [&ss](std::unique_ptr<http::request> req) {
|
||||
return ss.local().describe_schema_versions().then([] (auto result) {
|
||||
std::vector<sp::mapper_list> res;
|
||||
for (auto e : result) {
|
||||
sp::mapper_list entry;
|
||||
entry.key = std::move(e.first);
|
||||
entry.value = std::move(e.second);
|
||||
res.emplace_back(std::move(entry));
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(std::move(res));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void unset_storage_service(http_context& ctx, routes& r) {
|
||||
ss::local_hostid.unset(r);
|
||||
ss::get_tokens.unset(r);
|
||||
ss::get_node_tokens.unset(r);
|
||||
ss::get_commitlog.unset(r);
|
||||
ss::get_token_endpoint.unset(r);
|
||||
ss::toppartitions_generic.unset(r);
|
||||
ss::get_leaving_nodes.unset(r);
|
||||
ss::get_moving_nodes.unset(r);
|
||||
ss::get_joining_nodes.unset(r);
|
||||
ss::get_release_version.unset(r);
|
||||
ss::get_scylla_release_version.unset(r);
|
||||
ss::get_schema_version.unset(r);
|
||||
ss::get_all_data_file_locations.unset(r);
|
||||
ss::get_saved_caches_location.unset(r);
|
||||
ss::get_range_to_endpoint_map.unset(r);
|
||||
ss::get_pending_range_to_endpoint_map.unset(r);
|
||||
ss::describe_any_ring.unset(r);
|
||||
ss::describe_ring.unset(r);
|
||||
ss::get_host_id_map.unset(r);
|
||||
ss::get_load.unset(r);
|
||||
ss::get_load_map.unset(r);
|
||||
ss::get_current_generation_number.unset(r);
|
||||
ss::get_natural_endpoints.unset(r);
|
||||
ss::cdc_streams_check_and_repair.unset(r);
|
||||
ss::force_keyspace_compaction.unset(r);
|
||||
ss::force_keyspace_cleanup.unset(r);
|
||||
ss::perform_keyspace_offstrategy_compaction.unset(r);
|
||||
ss::upgrade_sstables.unset(r);
|
||||
ss::force_keyspace_flush.unset(r);
|
||||
ss::decommission.unset(r);
|
||||
ss::move.unset(r);
|
||||
ss::remove_node.unset(r);
|
||||
ss::get_removal_status.unset(r);
|
||||
ss::force_remove_completion.unset(r);
|
||||
ss::set_logging_level.unset(r);
|
||||
ss::get_logging_levels.unset(r);
|
||||
ss::get_operation_mode.unset(r);
|
||||
ss::is_starting.unset(r);
|
||||
ss::get_drain_progress.unset(r);
|
||||
ss::drain.unset(r);
|
||||
ss::truncate.unset(r);
|
||||
ss::get_keyspaces.unset(r);
|
||||
ss::stop_gossiping.unset(r);
|
||||
ss::start_gossiping.unset(r);
|
||||
ss::is_gossip_running.unset(r);
|
||||
ss::stop_daemon.unset(r);
|
||||
ss::is_initialized.unset(r);
|
||||
ss::join_ring.unset(r);
|
||||
ss::is_joined.unset(r);
|
||||
ss::set_stream_throughput_mb_per_sec.unset(r);
|
||||
ss::get_stream_throughput_mb_per_sec.unset(r);
|
||||
ss::get_compaction_throughput_mb_per_sec.unset(r);
|
||||
ss::set_compaction_throughput_mb_per_sec.unset(r);
|
||||
ss::is_incremental_backups_enabled.unset(r);
|
||||
ss::set_incremental_backups_enabled.unset(r);
|
||||
ss::rebuild.unset(r);
|
||||
ss::bulk_load.unset(r);
|
||||
ss::bulk_load_async.unset(r);
|
||||
ss::reschedule_failed_deletions.unset(r);
|
||||
ss::sample_key_range.unset(r);
|
||||
ss::reset_local_schema.unset(r);
|
||||
ss::set_trace_probability.unset(r);
|
||||
ss::get_trace_probability.unset(r);
|
||||
ss::get_slow_query_info.unset(r);
|
||||
ss::set_slow_query.unset(r);
|
||||
ss::enable_auto_compaction.unset(r);
|
||||
ss::disable_auto_compaction.unset(r);
|
||||
ss::enable_tombstone_gc.unset(r);
|
||||
ss::disable_tombstone_gc.unset(r);
|
||||
ss::deliver_hints.unset(r);
|
||||
ss::get_cluster_name.unset(r);
|
||||
ss::get_partitioner_name.unset(r);
|
||||
ss::get_tombstone_warn_threshold.unset(r);
|
||||
ss::set_tombstone_warn_threshold.unset(r);
|
||||
ss::get_tombstone_failure_threshold.unset(r);
|
||||
ss::set_tombstone_failure_threshold.unset(r);
|
||||
ss::get_batch_size_failure_threshold.unset(r);
|
||||
ss::set_batch_size_failure_threshold.unset(r);
|
||||
ss::set_hinted_handoff_throttle_in_kb.unset(r);
|
||||
ss::get_metrics_load.unset(r);
|
||||
ss::get_exceptions.unset(r);
|
||||
ss::get_total_hints_in_progress.unset(r);
|
||||
ss::get_total_hints.unset(r);
|
||||
ss::get_ownership.unset(r);
|
||||
ss::get_effective_ownership.unset(r);
|
||||
ss::sstable_info.unset(r);
|
||||
ss::reload_raft_topology_state.unset(r);
|
||||
sp::get_schema_versions.unset(r);
|
||||
}
|
||||
|
||||
enum class scrub_status {
|
||||
|
||||
@@ -25,6 +25,7 @@ class system_keyspace;
|
||||
}
|
||||
namespace netw { class messaging_service; }
|
||||
class repair_service;
|
||||
namespace cdc { class generation_service; }
|
||||
class sstables_loader;
|
||||
|
||||
namespace gms {
|
||||
@@ -50,6 +51,11 @@ sstring validate_keyspace(http_context& ctx, const httpd::parameters& param);
|
||||
// If the parameter is found and empty, returns a list of all table names in the keyspace.
|
||||
std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);
|
||||
|
||||
struct table_info {
|
||||
sstring name;
|
||||
table_id id;
|
||||
};
|
||||
|
||||
// splits a request parameter assumed to hold a comma-separated list of table names
|
||||
// verify that the tables are found, otherwise a bad_param_exception exception is thrown
|
||||
// containing the description of the respective no_such_column_family error.
|
||||
@@ -57,8 +63,7 @@ std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, con
|
||||
// if the parameter is not found or is empty, returns a list of all table infos in the keyspace.
|
||||
std::vector<table_info> parse_table_infos(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);
|
||||
|
||||
void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, service::raft_group0_client&);
|
||||
void unset_storage_service(http_context& ctx, httpd::routes& r);
|
||||
void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ls);
|
||||
void set_sstables_loader(http_context& ctx, httpd::routes& r, sharded<sstables_loader>& sst_loader);
|
||||
void unset_sstables_loader(http_context& ctx, httpd::routes& r);
|
||||
void set_view_builder(http_context& ctx, httpd::routes& r, sharded<db::view::view_builder>& vb);
|
||||
@@ -74,3 +79,9 @@ void unset_snapshot(http_context& ctx, httpd::routes& r);
|
||||
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request = false);
|
||||
|
||||
} // namespace api
|
||||
|
||||
namespace std {
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const api::table_info& ti);
|
||||
|
||||
} // namespace std
|
||||
|
||||
@@ -7,18 +7,10 @@
|
||||
*/
|
||||
|
||||
#include "api/api-doc/system.json.hh"
|
||||
#include "api/api-doc/metrics.json.hh"
|
||||
|
||||
#include "api/api.hh"
|
||||
|
||||
#include <seastar/core/reactor.hh>
|
||||
#include <seastar/core/metrics_api.hh>
|
||||
#include <seastar/core/relabel_config.hh>
|
||||
#include <seastar/http/exception.hh>
|
||||
#include <seastar/util/short_streams.hh>
|
||||
#include <seastar/http/short_streams.hh>
|
||||
#include "utils/rjson.hh"
|
||||
|
||||
#include "log.hh"
|
||||
#include "replica/database.hh"
|
||||
|
||||
@@ -28,77 +20,8 @@ namespace api {
|
||||
using namespace seastar::httpd;
|
||||
|
||||
namespace hs = httpd::system_json;
|
||||
namespace hm = httpd::metrics_json;
|
||||
|
||||
void set_system(http_context& ctx, routes& r) {
|
||||
hm::get_metrics_config.set(r, [](const_req req) {
|
||||
std::vector<hm::metrics_config> res;
|
||||
res.resize(seastar::metrics::get_relabel_configs().size());
|
||||
size_t i = 0;
|
||||
for (auto&& r : seastar::metrics::get_relabel_configs()) {
|
||||
res[i].action = r.action;
|
||||
res[i].target_label = r.target_label;
|
||||
res[i].replacement = r.replacement;
|
||||
res[i].separator = r.separator;
|
||||
res[i].source_labels = r.source_labels;
|
||||
res[i].regex = r.expr.str();
|
||||
i++;
|
||||
}
|
||||
return res;
|
||||
});
|
||||
|
||||
hm::set_metrics_config.set(r, [](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
rapidjson::Document doc;
|
||||
doc.Parse(req->content.c_str());
|
||||
if (!doc.IsArray()) {
|
||||
throw bad_param_exception("Expected a json array");
|
||||
}
|
||||
std::vector<seastar::metrics::relabel_config> relabels;
|
||||
relabels.resize(doc.Size());
|
||||
for (rapidjson::SizeType i = 0; i < doc.Size(); i++) {
|
||||
const auto& element = doc[i];
|
||||
if (element.HasMember("source_labels")) {
|
||||
std::vector<std::string> source_labels;
|
||||
source_labels.resize(element["source_labels"].Size());
|
||||
|
||||
for (size_t j = 0; j < element["source_labels"].Size(); j++) {
|
||||
source_labels[j] = element["source_labels"][j].GetString();
|
||||
}
|
||||
relabels[i].source_labels = source_labels;
|
||||
}
|
||||
if (element.HasMember("action")) {
|
||||
relabels[i].action = seastar::metrics::relabel_config_action(element["action"].GetString());
|
||||
}
|
||||
if (element.HasMember("replacement")) {
|
||||
relabels[i].replacement = element["replacement"].GetString();
|
||||
}
|
||||
if (element.HasMember("separator")) {
|
||||
relabels[i].separator = element["separator"].GetString();
|
||||
}
|
||||
if (element.HasMember("target_label")) {
|
||||
relabels[i].target_label = element["target_label"].GetString();
|
||||
}
|
||||
if (element.HasMember("regex")) {
|
||||
relabels[i].expr = element["regex"].GetString();
|
||||
}
|
||||
}
|
||||
return do_with(std::move(relabels), false, [](const std::vector<seastar::metrics::relabel_config>& relabels, bool& failed) {
|
||||
return smp::invoke_on_all([&relabels, &failed] {
|
||||
return metrics::set_relabel_configs(relabels).then([&failed](const metrics::metric_relabeling_result& result) {
|
||||
if (result.metrics_relabeled_due_to_collision > 0) {
|
||||
failed = true;
|
||||
}
|
||||
return;
|
||||
});
|
||||
}).then([&failed](){
|
||||
if (failed) {
|
||||
throw bad_param_exception("conflicts found during relabeling");
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(seastar::json::json_void());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
hs::get_system_uptime.set(r, [](const_req req) {
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(engine().uptime()).count();
|
||||
});
|
||||
|
||||
@@ -44,7 +44,6 @@ struct task_stats {
|
||||
: task_id(task->id().to_sstring())
|
||||
, state(task->get_status().state)
|
||||
, type(task->type())
|
||||
, scope(task->get_status().scope)
|
||||
, keyspace(task->get_status().keyspace)
|
||||
, table(task->get_status().table)
|
||||
, entity(task->get_status().entity)
|
||||
@@ -54,7 +53,6 @@ struct task_stats {
|
||||
sstring task_id;
|
||||
tasks::task_manager::task_state state;
|
||||
std::string type;
|
||||
std::string scope;
|
||||
std::string keyspace;
|
||||
std::string table;
|
||||
std::string entity;
|
||||
@@ -71,7 +69,6 @@ tm::task_status make_status(full_task_status status) {
|
||||
tm::task_status res{};
|
||||
res.id = status.task_status.id.to_sstring();
|
||||
res.type = status.type;
|
||||
res.scope = status.task_status.scope;
|
||||
res.state = status.task_status.state;
|
||||
res.is_abortable = bool(status.abortable);
|
||||
res.start_time = st;
|
||||
@@ -111,23 +108,18 @@ future<full_task_status> retrieve_status(const tasks::task_manager::foreign_task
|
||||
co_return s;
|
||||
}
|
||||
|
||||
void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>& tm, db::config& cfg) {
|
||||
tm::get_modules.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
std::vector<std::string> v = boost::copy_range<std::vector<std::string>>(tm.local().get_modules() | boost::adaptors::map_keys);
|
||||
void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
||||
tm::get_modules.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
std::vector<std::string> v = boost::copy_range<std::vector<std::string>>(ctx.tm.local().get_modules() | boost::adaptors::map_keys);
|
||||
co_return v;
|
||||
});
|
||||
|
||||
tm::get_tasks.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
tm::get_tasks.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
using chunked_stats = utils::chunked_vector<task_stats>;
|
||||
auto internal = tasks::is_internal{req_param<bool>(*req, "internal", false)};
|
||||
std::vector<chunked_stats> res = co_await tm.map([&req, internal] (tasks::task_manager& tm) {
|
||||
std::vector<chunked_stats> res = co_await ctx.tm.map([&req, internal] (tasks::task_manager& tm) {
|
||||
chunked_stats local_res;
|
||||
tasks::task_manager::module_ptr module;
|
||||
try {
|
||||
module = tm.find_module(req->param["module"]);
|
||||
} catch (...) {
|
||||
throw bad_param_exception(fmt::format("{}", std::current_exception()));
|
||||
}
|
||||
auto module = tm.find_module(req->param["module"]);
|
||||
const auto& filtered_tasks = module->get_tasks() | boost::adaptors::filtered([¶ms = req->query_parameters, internal] (const auto& task) {
|
||||
return (internal || !task.second->is_internal()) && filter_tasks(task.second, params);
|
||||
});
|
||||
@@ -156,76 +148,57 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
|
||||
co_return std::move(f);
|
||||
});
|
||||
|
||||
tm::get_task_status.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
tm::get_task_status.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
tasks::task_manager::foreign_task_ptr task;
|
||||
try {
|
||||
task = co_await tasks::task_manager::invoke_on_task(tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
|
||||
if (task->is_complete()) {
|
||||
task->unregister_task();
|
||||
}
|
||||
co_return std::move(task);
|
||||
}));
|
||||
} catch (tasks::task_manager::task_not_found& e) {
|
||||
throw bad_param_exception(e.what());
|
||||
}
|
||||
auto task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
|
||||
auto state = task->get_status().state;
|
||||
if (state == tasks::task_manager::task_state::done || state == tasks::task_manager::task_state::failed) {
|
||||
task->unregister_task();
|
||||
}
|
||||
co_return std::move(task);
|
||||
}));
|
||||
auto s = co_await retrieve_status(task);
|
||||
co_return make_status(s);
|
||||
});
|
||||
|
||||
tm::abort_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
tm::abort_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
try {
|
||||
co_await tasks::task_manager::invoke_on_task(tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
|
||||
if (!task->is_abortable()) {
|
||||
co_await coroutine::return_exception(std::runtime_error("Requested task cannot be aborted"));
|
||||
}
|
||||
co_await task->abort();
|
||||
});
|
||||
} catch (tasks::task_manager::task_not_found& e) {
|
||||
throw bad_param_exception(e.what());
|
||||
}
|
||||
co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
|
||||
if (!task->is_abortable()) {
|
||||
co_await coroutine::return_exception(std::runtime_error("Requested task cannot be aborted"));
|
||||
}
|
||||
co_await task->abort();
|
||||
});
|
||||
co_return json_void();
|
||||
});
|
||||
|
||||
tm::wait_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
tm::wait_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
tasks::task_manager::foreign_task_ptr task;
|
||||
try {
|
||||
task = co_await tasks::task_manager::invoke_on_task(tm, id, std::function([] (tasks::task_manager::task_ptr task) {
|
||||
return task->done().then_wrapped([task] (auto f) {
|
||||
task->unregister_task();
|
||||
// done() is called only because we want the task to be complete before getting its status.
|
||||
// The future should be ignored here as the result does not matter.
|
||||
f.ignore_ready_future();
|
||||
return make_foreign(task);
|
||||
});
|
||||
}));
|
||||
} catch (tasks::task_manager::task_not_found& e) {
|
||||
throw bad_param_exception(e.what());
|
||||
}
|
||||
auto task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) {
|
||||
return task->done().then_wrapped([task] (auto f) {
|
||||
task->unregister_task();
|
||||
f.get();
|
||||
return make_foreign(task);
|
||||
});
|
||||
}));
|
||||
auto s = co_await retrieve_status(task);
|
||||
co_return make_status(s);
|
||||
});
|
||||
|
||||
tm::get_task_status_recursively.set(r, [&_tm = tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto& tm = _tm;
|
||||
tm::get_task_status_recursively.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto& _ctx = ctx;
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
std::queue<tasks::task_manager::foreign_task_ptr> q;
|
||||
utils::chunked_vector<full_task_status> res;
|
||||
|
||||
tasks::task_manager::foreign_task_ptr task;
|
||||
try {
|
||||
// Get requested task.
|
||||
task = co_await tasks::task_manager::invoke_on_task(tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
|
||||
if (task->is_complete()) {
|
||||
task->unregister_task();
|
||||
}
|
||||
co_return task;
|
||||
}));
|
||||
} catch (tasks::task_manager::task_not_found& e) {
|
||||
throw bad_param_exception(e.what());
|
||||
}
|
||||
// Get requested task.
|
||||
auto task = co_await tasks::task_manager::invoke_on_task(_ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
|
||||
auto state = task->get_status().state;
|
||||
if (state == tasks::task_manager::task_state::done || state == tasks::task_manager::task_state::failed) {
|
||||
task->unregister_task();
|
||||
}
|
||||
co_return task;
|
||||
}));
|
||||
|
||||
// Push children's statuses in BFS order.
|
||||
q.push(co_await task.copy()); // Task cannot be moved since we need it to be alive during whole loop execution.
|
||||
@@ -255,23 +228,9 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
|
||||
|
||||
tm::get_and_update_ttl.set(r, [&cfg] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
uint32_t ttl = cfg.task_ttl_seconds();
|
||||
try {
|
||||
co_await cfg.task_ttl_seconds.set_value_on_all_shards(req->query_parameters["ttl"], utils::config_file::config_source::API);
|
||||
} catch (...) {
|
||||
throw bad_param_exception(fmt::format("{}", std::current_exception()));
|
||||
}
|
||||
co_await cfg.task_ttl_seconds.set_value_on_all_shards(req->query_parameters["ttl"], utils::config_file::config_source::API);
|
||||
co_return json::json_return_type(ttl);
|
||||
});
|
||||
}
|
||||
|
||||
void unset_task_manager(http_context& ctx, routes& r) {
|
||||
tm::get_modules.unset(r);
|
||||
tm::get_tasks.unset(r);
|
||||
tm::get_task_status.unset(r);
|
||||
tm::abort_task.unset(r);
|
||||
tm::wait_task.unset(r);
|
||||
tm::get_task_status_recursively.unset(r);
|
||||
tm::get_and_update_ttl.unset(r);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -8,17 +8,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include "api.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
namespace tasks {
|
||||
class task_manager;
|
||||
}
|
||||
|
||||
namespace api {
|
||||
|
||||
void set_task_manager(http_context& ctx, httpd::routes& r, sharded<tasks::task_manager>& tm, db::config& cfg);
|
||||
void unset_task_manager(http_context& ctx, httpd::routes& r);
|
||||
void set_task_manager(http_context& ctx, httpd::routes& r, db::config& cfg);
|
||||
|
||||
}
|
||||
|
||||
@@ -20,17 +20,17 @@ namespace tmt = httpd::task_manager_test_json;
|
||||
using namespace json;
|
||||
using namespace seastar::httpd;
|
||||
|
||||
void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_manager>& tm) {
|
||||
tmt::register_test_module.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
co_await tm.invoke_on_all([] (tasks::task_manager& tm) {
|
||||
void set_task_manager_test(http_context& ctx, routes& r) {
|
||||
tmt::register_test_module.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
co_await ctx.tm.invoke_on_all([] (tasks::task_manager& tm) {
|
||||
auto m = make_shared<tasks::test_module>(tm);
|
||||
tm.register_module("test", m);
|
||||
});
|
||||
co_return json_void();
|
||||
});
|
||||
|
||||
tmt::unregister_test_module.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
co_await tm.invoke_on_all([] (tasks::task_manager& tm) -> future<> {
|
||||
tmt::unregister_test_module.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
co_await ctx.tm.invoke_on_all([] (tasks::task_manager& tm) -> future<> {
|
||||
auto module_name = "test";
|
||||
auto module = tm.find_module(module_name);
|
||||
co_await module->stop();
|
||||
@@ -38,8 +38,8 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
|
||||
co_return json_void();
|
||||
});
|
||||
|
||||
tmt::register_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
sharded<tasks::task_manager>& tms = tm;
|
||||
tmt::register_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
sharded<tasks::task_manager>& tms = ctx.tm;
|
||||
auto it = req->query_parameters.find("task_id");
|
||||
auto id = it != req->query_parameters.end() ? tasks::task_id{utils::UUID{it->second}} : tasks::task_id::create_null_id();
|
||||
it = req->query_parameters.find("shard");
|
||||
@@ -54,7 +54,7 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
|
||||
tasks::task_info data;
|
||||
if (it != req->query_parameters.end()) {
|
||||
data.id = tasks::task_id{utils::UUID{it->second}};
|
||||
auto parent_ptr = co_await tasks::task_manager::lookup_task_on_all_shards(tm, data.id);
|
||||
auto parent_ptr = co_await tasks::task_manager::lookup_task_on_all_shards(ctx.tm, data.id);
|
||||
data.shard = parent_ptr->get_status().shard;
|
||||
}
|
||||
|
||||
@@ -69,50 +69,34 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
|
||||
co_return id.to_sstring();
|
||||
});
|
||||
|
||||
tmt::unregister_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
tmt::unregister_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->query_parameters["task_id"]}};
|
||||
try {
|
||||
co_await tasks::task_manager::invoke_on_task(tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
|
||||
tasks::test_task test_task{task};
|
||||
co_await test_task.unregister_task();
|
||||
});
|
||||
} catch (tasks::task_manager::task_not_found& e) {
|
||||
throw bad_param_exception(e.what());
|
||||
}
|
||||
co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
|
||||
tasks::test_task test_task{task};
|
||||
co_await test_task.unregister_task();
|
||||
});
|
||||
co_return json_void();
|
||||
});
|
||||
|
||||
tmt::finish_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
tmt::finish_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
auto it = req->query_parameters.find("error");
|
||||
bool fail = it != req->query_parameters.end();
|
||||
std::string error = fail ? it->second : "";
|
||||
|
||||
try {
|
||||
co_await tasks::task_manager::invoke_on_task(tm, id, [fail, error = std::move(error)] (tasks::task_manager::task_ptr task) {
|
||||
tasks::test_task test_task{task};
|
||||
if (fail) {
|
||||
test_task.finish_failed(std::make_exception_ptr(std::runtime_error(error)));
|
||||
} else {
|
||||
test_task.finish();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
} catch (tasks::task_manager::task_not_found& e) {
|
||||
throw bad_param_exception(e.what());
|
||||
}
|
||||
co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [fail, error = std::move(error)] (tasks::task_manager::task_ptr task) {
|
||||
tasks::test_task test_task{task};
|
||||
if (fail) {
|
||||
test_task.finish_failed(std::make_exception_ptr(std::runtime_error(error)));
|
||||
} else {
|
||||
test_task.finish();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
co_return json_void();
|
||||
});
|
||||
}
|
||||
|
||||
void unset_task_manager_test(http_context& ctx, routes& r) {
|
||||
tmt::register_test_module.unset(r);
|
||||
tmt::unregister_test_module.unset(r);
|
||||
tmt::register_test_task.unset(r);
|
||||
tmt::unregister_test_task.unset(r);
|
||||
tmt::finish_test_task.unset(r);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -10,17 +10,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include "api.hh"
|
||||
|
||||
namespace tasks {
|
||||
class task_manager;
|
||||
}
|
||||
|
||||
namespace api {
|
||||
|
||||
void set_task_manager_test(http_context& ctx, httpd::routes& r, sharded<tasks::task_manager>& tm);
|
||||
void unset_task_manager_test(http_context& ctx, httpd::routes& r);
|
||||
void set_task_manager_test(http_context& ctx, httpd::routes& r);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ target_sources(scylla_auth
|
||||
allow_all_authorizer.cc
|
||||
authenticated_user.cc
|
||||
authenticator.cc
|
||||
certificate_authenticator.cc
|
||||
common.cc
|
||||
default_authorizer.cc
|
||||
password_authenticator.cc
|
||||
@@ -31,7 +30,6 @@ target_link_libraries(scylla_auth
|
||||
PRIVATE
|
||||
cql3
|
||||
idl
|
||||
wasmtime_bindings
|
||||
libxcrypt::libxcrypt)
|
||||
wasmtime_bindings)
|
||||
|
||||
add_whole_archive(auth scylla_auth)
|
||||
|
||||
@@ -18,7 +18,3 @@
|
||||
|
||||
const sstring auth::authenticator::USERNAME_KEY("username");
|
||||
const sstring auth::authenticator::PASSWORD_KEY("password");
|
||||
|
||||
future<std::optional<auth::authenticated_user>> auth::authenticator::authenticate(session_dn_func) const {
|
||||
return make_ready_future<std::optional<auth::authenticated_user>>(std::nullopt);
|
||||
}
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
#include <set>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
#include <optional>
|
||||
#include <functional>
|
||||
|
||||
#include <seastar/core/enum.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
@@ -38,16 +36,6 @@ namespace auth {
|
||||
|
||||
class authenticated_user;
|
||||
|
||||
// Query alt name info as a single (subject style) string
|
||||
using alt_name_func = std::function<future<std::string>()>;
|
||||
|
||||
struct certificate_info {
|
||||
std::string subject;
|
||||
alt_name_func get_alt_names;
|
||||
};
|
||||
|
||||
using session_dn_func = std::function<future<std::optional<certificate_info>>()>;
|
||||
|
||||
///
|
||||
/// Abstract client for authenticating role identity.
|
||||
///
|
||||
@@ -99,13 +87,6 @@ public:
|
||||
///
|
||||
virtual future<authenticated_user> authenticate(const credentials_map& credentials) const = 0;
|
||||
|
||||
///
|
||||
/// Authenticate (early) using transport info
|
||||
///
|
||||
/// \returns nullopt if not supported/required. exceptional future if failed
|
||||
///
|
||||
virtual future<std::optional<authenticated_user>> authenticate(session_dn_func) const;
|
||||
|
||||
///
|
||||
/// Create an authentication record for a new user. This is required before the user can log-in.
|
||||
///
|
||||
|
||||
@@ -1,181 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022-present ScyllaDB
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
#include "auth/certificate_authenticator.hh"
|
||||
|
||||
#include <regex>
|
||||
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
static const auto CERT_AUTH_NAME = "com.scylladb.auth.CertificateAuthenticator";
|
||||
const std::string_view auth::certificate_authenticator_name(CERT_AUTH_NAME);
|
||||
|
||||
static logging::logger clogger("certificate_authenticator");
|
||||
|
||||
static const std::string cfg_source_attr = "source";
|
||||
static const std::string cfg_query_attr = "query";
|
||||
|
||||
static const std::string cfg_source_subject = "SUBJECT";
|
||||
static const std::string cfg_source_altname = "ALTNAME";
|
||||
|
||||
static const class_registrator<auth::authenticator
|
||||
, auth::certificate_authenticator
|
||||
, cql3::query_processor&
|
||||
, ::service::migration_manager&> cert_auth_reg(CERT_AUTH_NAME);
|
||||
|
||||
enum class auth::certificate_authenticator::query_source {
|
||||
subject, altname
|
||||
};
|
||||
|
||||
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::migration_manager&)
|
||||
: _queries([&] {
|
||||
auto& conf = qp.db().get_config();
|
||||
auto queries = conf.auth_certificate_role_queries();
|
||||
|
||||
if (queries.empty()) {
|
||||
throw std::invalid_argument("No role extraction queries specified.");
|
||||
}
|
||||
|
||||
std::vector<std::pair<query_source, boost::regex>> res;
|
||||
|
||||
for (auto& map : queries) {
|
||||
// first, check for any invalid config keys
|
||||
if (map.size() == 2) {
|
||||
try {
|
||||
auto& source = map.at(cfg_source_attr);
|
||||
std::string query = map.at(cfg_query_attr);
|
||||
|
||||
std::transform(source.begin(), source.end(), source.begin(), ::toupper);
|
||||
|
||||
boost::regex ex(query);
|
||||
if (ex.mark_count() != 1) {
|
||||
throw std::invalid_argument("Role query must have exactly one mark expression");
|
||||
}
|
||||
|
||||
clogger.debug("Append role query: {} : {}", source, query);
|
||||
|
||||
if (source == cfg_source_subject) {
|
||||
res.emplace_back(query_source::subject, std::move(ex));
|
||||
} else if (source == cfg_source_altname) {
|
||||
res.emplace_back(query_source::altname, std::move(ex));
|
||||
} else {
|
||||
throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
|
||||
}
|
||||
continue;
|
||||
} catch (std::out_of_range&) {
|
||||
// just fallthrough
|
||||
} catch (std::regex_error&) {
|
||||
std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
|
||||
}
|
||||
}
|
||||
throw std::invalid_argument(fmt::format("Invalid query: {}", map));
|
||||
}
|
||||
return res;
|
||||
}())
|
||||
{}
|
||||
|
||||
auth::certificate_authenticator::~certificate_authenticator() = default;
|
||||
|
||||
future<> auth::certificate_authenticator::start() {
|
||||
co_return;
|
||||
}
|
||||
|
||||
future<> auth::certificate_authenticator::stop() {
|
||||
co_return;
|
||||
}
|
||||
|
||||
std::string_view auth::certificate_authenticator::qualified_java_name() const {
|
||||
return certificate_authenticator_name;
|
||||
}
|
||||
|
||||
bool auth::certificate_authenticator::require_authentication() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
auth::authentication_option_set auth::certificate_authenticator::supported_options() const {
|
||||
return {};
|
||||
}
|
||||
|
||||
auth::authentication_option_set auth::certificate_authenticator::alterable_options() const {
|
||||
return {};
|
||||
}
|
||||
|
||||
future<std::optional<auth::authenticated_user>> auth::certificate_authenticator::authenticate(session_dn_func f) const {
|
||||
if (!f) {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
auto dninfo = co_await f();
|
||||
if (!dninfo) {
|
||||
throw exceptions::authentication_exception("No valid certificate found");
|
||||
}
|
||||
|
||||
auto& subject = dninfo->subject;
|
||||
std::optional<std::string> altname ;
|
||||
|
||||
const std::string* source_str = nullptr;
|
||||
|
||||
for (auto& [source, expr] : _queries) {
|
||||
switch (source) {
|
||||
default:
|
||||
case query_source::subject:
|
||||
source_str = &subject;
|
||||
break;
|
||||
case query_source::altname:
|
||||
if (!altname) {
|
||||
altname = dninfo->get_alt_names ? co_await dninfo->get_alt_names() : std::string{};
|
||||
}
|
||||
source_str = &*altname;
|
||||
break;
|
||||
}
|
||||
|
||||
clogger.debug("Checking {}: {}", int(source), *source_str);
|
||||
|
||||
boost::smatch m;
|
||||
if (boost::regex_search(*source_str, m, expr)) {
|
||||
auto username = m[1].str();
|
||||
clogger.debug("Return username: {}", username);
|
||||
co_return username;
|
||||
}
|
||||
}
|
||||
throw exceptions::authentication_exception(format("Subject '{}'/'{}' does not match any query expression", subject, altname));
|
||||
}
|
||||
|
||||
|
||||
future<auth::authenticated_user> auth::certificate_authenticator::authenticate(const credentials_map&) const {
|
||||
throw exceptions::authentication_exception("Cannot authenticate using attribute map");
|
||||
}
|
||||
|
||||
future<> auth::certificate_authenticator::create(std::string_view role_name, const authentication_options& options) const {
|
||||
// TODO: should we keep track of roles/enforce existence? Role manager should deal with this...
|
||||
co_return;
|
||||
}
|
||||
|
||||
future<> auth::certificate_authenticator::alter(std::string_view role_name, const authentication_options& options) const {
|
||||
co_return;
|
||||
}
|
||||
|
||||
future<> auth::certificate_authenticator::drop(std::string_view role_name) const {
|
||||
co_return;
|
||||
}
|
||||
|
||||
future<auth::custom_options> auth::certificate_authenticator::query_custom_options(std::string_view) const {
|
||||
co_return auth::custom_options{};
|
||||
}
|
||||
|
||||
const auth::resource_set& auth::certificate_authenticator::protected_resources() const {
|
||||
static const resource_set resources;
|
||||
return resources;
|
||||
}
|
||||
|
||||
::shared_ptr<auth::sasl_challenge> auth::certificate_authenticator::new_sasl_challenge() const {
|
||||
throw exceptions::authentication_exception("Login authentication not supported");
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022-present ScyllaDB
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
#include "auth/authenticator.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
class query_processor;
|
||||
|
||||
} // namespace cql3
|
||||
|
||||
namespace service {
|
||||
class migration_manager;
|
||||
}
|
||||
|
||||
namespace auth {
|
||||
|
||||
extern const std::string_view certificate_authenticator_name;
|
||||
|
||||
class certificate_authenticator : public authenticator {
|
||||
enum class query_source;
|
||||
std::vector<std::pair<query_source, boost::regex>> _queries;
|
||||
public:
|
||||
certificate_authenticator(cql3::query_processor&, ::service::migration_manager&);
|
||||
~certificate_authenticator();
|
||||
|
||||
future<> start() override;
|
||||
future<> stop() override;
|
||||
|
||||
std::string_view qualified_java_name() const override;
|
||||
|
||||
bool require_authentication() const override;
|
||||
|
||||
authentication_option_set supported_options() const override;
|
||||
authentication_option_set alterable_options() const override;
|
||||
|
||||
future<authenticated_user> authenticate(const credentials_map& credentials) const override;
|
||||
future<std::optional<authenticated_user>> authenticate(session_dn_func) const override;
|
||||
|
||||
future<> create(std::string_view role_name, const authentication_options& options) const override;
|
||||
future<> alter(std::string_view role_name, const authentication_options& options) const override;
|
||||
future<> drop(std::string_view role_name) const override;
|
||||
|
||||
future<custom_options> query_custom_options(std::string_view role_name) const override;
|
||||
|
||||
const resource_set& protected_resources() const override;
|
||||
|
||||
::shared_ptr<sasl_challenge> new_sasl_challenge() const override;
|
||||
private:
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -71,8 +71,7 @@ static future<> create_metadata_table_if_missing_impl(
|
||||
auto group0_guard = co_await mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
try {
|
||||
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
|
||||
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
|
||||
co_return co_await mm.announce(co_await mm.prepare_new_column_family_announcement(table, ts), std::move(group0_guard));
|
||||
} catch (exceptions::already_exists_exception&) {}
|
||||
}
|
||||
}
|
||||
@@ -85,6 +84,20 @@ future<> create_metadata_table_if_missing(
|
||||
return futurize_invoke(create_metadata_table_if_missing_impl, table_name, qp, cql, mm);
|
||||
}
|
||||
|
||||
future<> wait_for_schema_agreement(::service::migration_manager& mm, const replica::database& db, seastar::abort_source& as) {
|
||||
static const auto pause = [] { return sleep(std::chrono::milliseconds(500)); };
|
||||
|
||||
return do_until([&db, &as] {
|
||||
as.check();
|
||||
return db.get_version() != replica::database::empty_version;
|
||||
}, pause).then([&mm, &as] {
|
||||
return do_until([&mm, &as] {
|
||||
as.check();
|
||||
return mm.have_schema_agreement();
|
||||
}, pause);
|
||||
});
|
||||
}
|
||||
|
||||
::service::query_state& internal_distributed_query_state() noexcept {
|
||||
#ifdef DEBUG
|
||||
// Give the much slower debug tests more headroom for completing auth queries.
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "log.hh"
|
||||
#include "seastarx.hh"
|
||||
#include "utils/exponential_backoff_retry.hh"
|
||||
#include "service/query_state.hh"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
@@ -31,7 +32,6 @@ class database;
|
||||
|
||||
namespace service {
|
||||
class migration_manager;
|
||||
class query_state;
|
||||
}
|
||||
|
||||
namespace cql3 {
|
||||
@@ -67,6 +67,8 @@ future<> create_metadata_table_if_missing(
|
||||
std::string_view cql,
|
||||
::service::migration_manager&) noexcept;
|
||||
|
||||
future<> wait_for_schema_agreement(::service::migration_manager&, const replica::database&, seastar::abort_source&);
|
||||
|
||||
///
|
||||
/// Time-outs for internal, non-local CQL queries.
|
||||
///
|
||||
|
||||
@@ -129,7 +129,7 @@ future<> default_authorizer::start() {
|
||||
_migration_manager).then([this] {
|
||||
_finished = do_after_system_ready(_as, [this] {
|
||||
return async([this] {
|
||||
_migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get0();
|
||||
wait_for_schema_agreement(_migration_manager, _qp.db().real_database(), _as).get0();
|
||||
|
||||
if (legacy_metadata_exists()) {
|
||||
if (!any_granted().get0()) {
|
||||
|
||||
@@ -29,7 +29,6 @@
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -51,23 +50,14 @@ static const class_registrator<
|
||||
|
||||
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
|
||||
|
||||
static std::string_view get_config_value(std::string_view value, std::string_view def) {
|
||||
return value.empty() ? def : value;
|
||||
}
|
||||
|
||||
std::string password_authenticator::default_superuser(const db::config& cfg) {
|
||||
return std::string(get_config_value(cfg.auth_superuser_name(), DEFAULT_USER_NAME));
|
||||
}
|
||||
|
||||
password_authenticator::~password_authenticator() {
|
||||
}
|
||||
|
||||
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::migration_manager& mm)
|
||||
: _qp(qp)
|
||||
, _migration_manager(mm)
|
||||
, _stopped(make_ready_future<>())
|
||||
, _superuser(default_superuser(qp.db().get_config()))
|
||||
{}
|
||||
, _stopped(make_ready_future<>()) {
|
||||
}
|
||||
|
||||
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
|
||||
return !row.get_or<sstring>(SALTED_HASH, "").empty();
|
||||
@@ -116,17 +106,13 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
}
|
||||
|
||||
future<> password_authenticator::create_default_if_missing() const {
|
||||
return default_role_row_satisfies(_qp, &has_salted_hash, _superuser).then([this](bool exists) {
|
||||
return default_role_row_satisfies(_qp, &has_salted_hash).then([this](bool exists) {
|
||||
if (!exists) {
|
||||
std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
|
||||
if (salted_pwd.empty()) {
|
||||
salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt);
|
||||
}
|
||||
return _qp.execute_internal(
|
||||
update_row_query(),
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
{salted_pwd, _superuser},
|
||||
{passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME},
|
||||
cql3::query_processor::cache_internal::no).then([](auto&&) {
|
||||
plogger.info("Created default superuser authentication record.");
|
||||
});
|
||||
@@ -146,9 +132,9 @@ future<> password_authenticator::start() {
|
||||
|
||||
_stopped = do_after_system_ready(_as, [this] {
|
||||
return async([this] {
|
||||
_migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get0();
|
||||
wait_for_schema_agreement(_migration_manager, _qp.db().real_database(), _as).get0();
|
||||
|
||||
if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash, _superuser).get0()) {
|
||||
if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash).get0()) {
|
||||
if (legacy_metadata_exists()) {
|
||||
plogger.warn("Ignoring legacy authentication metadata since nondefault data already exist.");
|
||||
}
|
||||
@@ -175,8 +161,6 @@ future<> password_authenticator::stop() {
|
||||
}
|
||||
|
||||
db::consistency_level password_authenticator::consistency_for_user(std::string_view role_name) {
|
||||
// TODO: this is plain dung. Why treat hardcoded default special, but for example a user-created
|
||||
// super user uses plain LOCAL_ONE?
|
||||
if (role_name == DEFAULT_USER_NAME) {
|
||||
return db::consistency_level::QUORUM;
|
||||
}
|
||||
|
||||
@@ -14,10 +14,6 @@
|
||||
|
||||
#include "auth/authenticator.hh"
|
||||
|
||||
namespace db {
|
||||
class config;
|
||||
}
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
class query_processor;
|
||||
@@ -37,11 +33,9 @@ class password_authenticator : public authenticator {
|
||||
::service::migration_manager& _migration_manager;
|
||||
future<> _stopped;
|
||||
seastar::abort_source _as;
|
||||
std::string _superuser;
|
||||
|
||||
public:
|
||||
static db::consistency_level consistency_for_user(std::string_view role_name);
|
||||
static std::string default_superuser(const db::config&);
|
||||
|
||||
password_authenticator(cql3::query_processor&, ::service::migration_manager&);
|
||||
|
||||
|
||||
@@ -46,43 +46,60 @@ constexpr std::string_view qualified_name("system_auth.roles");
|
||||
|
||||
future<bool> default_role_row_satisfies(
|
||||
cql3::query_processor& qp,
|
||||
std::function<bool(const cql3::untyped_result_set_row&)> p,
|
||||
std::optional<std::string> rolename) {
|
||||
std::function<bool(const cql3::untyped_result_set_row&)> p) {
|
||||
static const sstring query = format("SELECT * FROM {} WHERE {} = ?",
|
||||
meta::roles_table::qualified_name,
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
for (auto cl : { db::consistency_level::ONE, db::consistency_level::QUORUM }) {
|
||||
auto results = co_await qp.execute_internal(query, cl
|
||||
, internal_distributed_query_state()
|
||||
, {rolename.value_or(std::string(meta::DEFAULT_SUPERUSER_NAME))}
|
||||
, cql3::query_processor::cache_internal::yes
|
||||
);
|
||||
if (!results->empty()) {
|
||||
co_return p(results->one());
|
||||
}
|
||||
}
|
||||
co_return false;
|
||||
return do_with(std::move(p), [&qp](const auto& p) {
|
||||
return qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_query_state(),
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
cql3::query_processor::cache_internal::yes).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
cql3::query_processor::cache_internal::yes).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return make_ready_future<bool>(false);
|
||||
}
|
||||
|
||||
return make_ready_future<bool>(p(results->one()));
|
||||
});
|
||||
}
|
||||
|
||||
return make_ready_future<bool>(p(results->one()));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<bool> any_nondefault_role_row_satisfies(
|
||||
cql3::query_processor& qp,
|
||||
std::function<bool(const cql3::untyped_result_set_row&)> p,
|
||||
std::optional<std::string> rolename) {
|
||||
std::function<bool(const cql3::untyped_result_set_row&)> p) {
|
||||
static const sstring query = format("SELECT * FROM {}", meta::roles_table::qualified_name);
|
||||
|
||||
auto results = co_await qp.execute_internal(query, db::consistency_level::QUORUM
|
||||
, internal_distributed_query_state(), cql3::query_processor::cache_internal::no
|
||||
);
|
||||
if (results->empty()) {
|
||||
co_return false;
|
||||
}
|
||||
static const sstring col_name = sstring(meta::roles_table::role_col_name);
|
||||
return do_with(std::move(p), [&qp](const auto& p) {
|
||||
return qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
cql3::query_processor::cache_internal::no).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
co_return boost::algorithm::any_of(*results, [&](const cql3::untyped_result_set_row& row) {
|
||||
auto superuser = rolename ? std::string_view(*rolename) : meta::DEFAULT_SUPERUSER_NAME;
|
||||
const bool is_nondefault = row.get_as<sstring>(col_name) != superuser;
|
||||
return is_nondefault && p(row);
|
||||
static const sstring col_name = sstring(meta::roles_table::role_col_name);
|
||||
|
||||
return boost::algorithm::any_of(*results, [&p](const cql3::untyped_result_set_row& row) {
|
||||
const bool is_nondefault = row.get_as<sstring>(col_name) != meta::DEFAULT_SUPERUSER_NAME;
|
||||
return is_nondefault && p(row);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -43,17 +43,13 @@ constexpr std::string_view role_col_name{"role", 4};
|
||||
///
|
||||
future<bool> default_role_row_satisfies(
|
||||
cql3::query_processor&,
|
||||
std::function<bool(const cql3::untyped_result_set_row&)>,
|
||||
std::optional<std::string> rolename = {}
|
||||
);
|
||||
std::function<bool(const cql3::untyped_result_set_row&)>);
|
||||
|
||||
///
|
||||
/// Check that any nondefault role satisfies a predicate. `false` if no nondefault roles exist.
|
||||
///
|
||||
future<bool> any_nondefault_role_row_satisfies(
|
||||
cql3::query_processor&,
|
||||
std::function<bool(const cql3::untyped_result_set_row&)>,
|
||||
std::optional<std::string> rolename = {}
|
||||
);
|
||||
std::function<bool(const cql3::untyped_result_set_row&)>);
|
||||
|
||||
}
|
||||
|
||||
@@ -178,8 +178,7 @@ future<> service::create_keyspace_if_missing(::service::migration_manager& mm) c
|
||||
opts,
|
||||
true);
|
||||
|
||||
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
|
||||
std::move(group0_guard), format("auth_service: create {} keyspace", meta::AUTH_KS));
|
||||
co_return co_await mm.announce(mm.prepare_new_keyspace_announcement(ksm, ts), std::move(group0_guard));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,8 +28,6 @@
|
||||
#include "log.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "password_authenticator.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -129,13 +127,6 @@ static bool has_can_login(const cql3::untyped_result_set_row& row) {
|
||||
return row.has("can_login") && !(boolean_type->deserialize(row.get_blob("can_login")).is_null());
|
||||
}
|
||||
|
||||
standard_role_manager::standard_role_manager(cql3::query_processor& qp, ::service::migration_manager& mm)
|
||||
: _qp(qp)
|
||||
, _migration_manager(mm)
|
||||
, _stopped(make_ready_future<>())
|
||||
, _superuser(password_authenticator::default_superuser(qp.db().get_config()))
|
||||
{}
|
||||
|
||||
std::string_view standard_role_manager::qualified_java_name() const noexcept {
|
||||
return "org.apache.cassandra.auth.CassandraRoleManager";
|
||||
}
|
||||
@@ -177,7 +168,7 @@ future<> standard_role_manager::create_metadata_tables_if_missing() const {
|
||||
}
|
||||
|
||||
future<> standard_role_manager::create_default_role_if_missing() const {
|
||||
return default_role_row_satisfies(_qp, &has_can_login, _superuser).then([this](bool exists) {
|
||||
return default_role_row_satisfies(_qp, &has_can_login).then([this](bool exists) {
|
||||
if (!exists) {
|
||||
static const sstring query = format("INSERT INTO {} ({}, is_superuser, can_login) VALUES (?, true, true)",
|
||||
meta::roles_table::qualified_name,
|
||||
@@ -187,9 +178,9 @@ future<> standard_role_manager::create_default_role_if_missing() const {
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
{_superuser},
|
||||
cql3::query_processor::cache_internal::no).then([this](auto&&) {
|
||||
log.info("Created default superuser role '{}'.", _superuser);
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
cql3::query_processor::cache_internal::no).then([](auto&&) {
|
||||
log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
@@ -241,7 +232,7 @@ future<> standard_role_manager::start() {
|
||||
return this->create_metadata_tables_if_missing().then([this] {
|
||||
_stopped = auth::do_after_system_ready(_as, [this] {
|
||||
return seastar::async([this] {
|
||||
_migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get0();
|
||||
wait_for_schema_agreement(_migration_manager, _qp.db().real_database(), _as).get0();
|
||||
|
||||
if (any_nondefault_role_row_satisfies(_qp, &has_can_login).get0()) {
|
||||
if (this->legacy_metadata_exists()) {
|
||||
|
||||
@@ -34,10 +34,13 @@ class standard_role_manager final : public role_manager {
|
||||
::service::migration_manager& _migration_manager;
|
||||
future<> _stopped;
|
||||
seastar::abort_source _as;
|
||||
std::string _superuser;
|
||||
|
||||
public:
|
||||
standard_role_manager(cql3::query_processor&, ::service::migration_manager&);
|
||||
standard_role_manager(cql3::query_processor& qp, ::service::migration_manager& mm)
|
||||
: _qp(qp)
|
||||
, _migration_manager(mm)
|
||||
, _stopped(make_ready_future<>()) {
|
||||
}
|
||||
|
||||
virtual std::string_view qualified_java_name() const noexcept override;
|
||||
|
||||
|
||||
@@ -37,8 +37,10 @@
|
||||
// The constants q1 and q2 are used to determine the proportional factor at each stage.
|
||||
class backlog_controller {
|
||||
public:
|
||||
using scheduling_group = seastar::scheduling_group;
|
||||
|
||||
struct scheduling_group {
|
||||
seastar::scheduling_group cpu = default_scheduling_group();
|
||||
seastar::io_priority_class io = default_priority_class();
|
||||
};
|
||||
future<> shutdown() {
|
||||
_update_timer.cancel();
|
||||
return std::move(_inflight_update);
|
||||
@@ -56,11 +58,11 @@ protected:
|
||||
};
|
||||
|
||||
scheduling_group _scheduling_group;
|
||||
timer<> _update_timer;
|
||||
|
||||
std::vector<control_point> _control_points;
|
||||
|
||||
std::function<float()> _current_backlog;
|
||||
timer<> _update_timer;
|
||||
// updating shares for an I/O class may contact another shard and returns a future.
|
||||
future<> _inflight_update;
|
||||
|
||||
@@ -80,9 +82,9 @@ protected:
|
||||
std::vector<control_point> control_points, std::function<float()> backlog,
|
||||
float static_shares = 0)
|
||||
: _scheduling_group(std::move(sg))
|
||||
, _update_timer([this] { adjust(); })
|
||||
, _control_points()
|
||||
, _current_backlog(std::move(backlog))
|
||||
, _update_timer([this] { adjust(); })
|
||||
, _inflight_update(make_ready_future<>())
|
||||
, _static_shares(static_shares)
|
||||
{
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2023-present ScyllaDB
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
here=$(dirname "$0")
|
||||
exec "$here/../tools/cqlsh/bin/cqlsh" "$@"
|
||||
|
||||
@@ -110,9 +110,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
|
||||
flat_mutation_reader_v2* _underlying = nullptr;
|
||||
flat_mutation_reader_v2_opt _underlying_holder;
|
||||
|
||||
gc_clock::time_point _read_time;
|
||||
gc_clock::time_point _gc_before;
|
||||
|
||||
future<> do_fill_buffer();
|
||||
future<> ensure_underlying();
|
||||
void copy_from_cache_to_buffer();
|
||||
@@ -181,20 +178,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
|
||||
const schema& table_schema() {
|
||||
return *_snp->schema();
|
||||
}
|
||||
|
||||
gc_clock::time_point get_read_time() {
|
||||
return _read_context.tombstone_gc_state() ? gc_clock::now() : gc_clock::time_point::min();
|
||||
}
|
||||
|
||||
gc_clock::time_point get_gc_before(const schema& schema, dht::decorated_key dk, const gc_clock::time_point query_time) {
|
||||
auto gc_state = _read_context.tombstone_gc_state();
|
||||
if (gc_state) {
|
||||
return gc_state->get_gc_before_for_key(schema.shared_from_this(), dk, query_time);
|
||||
}
|
||||
|
||||
return gc_clock::time_point::min();
|
||||
}
|
||||
|
||||
public:
|
||||
cache_flat_mutation_reader(schema_ptr s,
|
||||
dht::decorated_key dk,
|
||||
@@ -213,8 +196,6 @@ public:
|
||||
, _read_context_holder()
|
||||
, _read_context(ctx) // ctx is owned by the caller, who's responsible for closing it.
|
||||
, _next_row(*_schema, *_snp, false, _read_context.is_reversed())
|
||||
, _read_time(get_read_time())
|
||||
, _gc_before(get_gc_before(*_schema, dk, _read_time))
|
||||
{
|
||||
clogger.trace("csm {}: table={}.{}, reversed={}, snap={}", fmt::ptr(this), _schema->ks_name(), _schema->cf_name(), _read_context.is_reversed(),
|
||||
fmt::ptr(&*_snp));
|
||||
@@ -749,51 +730,9 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
|
||||
}
|
||||
}
|
||||
|
||||
// We add the row to the buffer even when it's full.
|
||||
// This simplifies the code. For more info see #3139.
|
||||
if (_next_row_in_range) {
|
||||
bool remove_row = false;
|
||||
|
||||
if (_read_context.tombstone_gc_state() // do not compact rows when tombstone_gc_state is not set (used in some unit tests)
|
||||
&& !_next_row.dummy()
|
||||
&& _snp->at_latest_version()
|
||||
&& _snp->at_oldest_version()) {
|
||||
deletable_row& row = _next_row.latest_row();
|
||||
tombstone range_tomb = _next_row.range_tombstone_for_row();
|
||||
auto t = row.deleted_at();
|
||||
t.apply(range_tomb);
|
||||
|
||||
auto row_tomb_expired = [&](row_tombstone tomb) {
|
||||
return (tomb && tomb.max_deletion_time() < _gc_before);
|
||||
};
|
||||
|
||||
auto is_row_dead = [&](const deletable_row& row) {
|
||||
auto& m = row.marker();
|
||||
return (!m.is_missing() && m.is_dead(_read_time) && m.deletion_time() < _gc_before);
|
||||
};
|
||||
|
||||
if (row_tomb_expired(t) || is_row_dead(row)) {
|
||||
can_gc_fn always_gc = [&](tombstone) { return true; };
|
||||
const schema& row_schema = _next_row.latest_row_schema();
|
||||
|
||||
_read_context.cache()._tracker.on_row_compacted();
|
||||
|
||||
with_allocator(_snp->region().allocator(), [&] {
|
||||
deletable_row row_copy(row_schema, row);
|
||||
row_copy.compact_and_expire(row_schema, t.tomb(), _read_time, always_gc, _gc_before, nullptr);
|
||||
std::swap(row, row_copy);
|
||||
});
|
||||
remove_row = row.empty();
|
||||
|
||||
auto tomb_expired = [&](tombstone tomb) {
|
||||
return (tomb && tomb.deletion_time < _gc_before);
|
||||
};
|
||||
|
||||
auto latests_range_tomb = _next_row.get_iterator_in_latest_version()->range_tombstone();
|
||||
if (tomb_expired(latests_range_tomb)) {
|
||||
_next_row.get_iterator_in_latest_version()->set_range_tombstone({});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (_next_row.range_tombstone_for_row() != _current_tombstone) [[unlikely]] {
|
||||
auto tomb = _next_row.range_tombstone_for_row();
|
||||
auto new_lower_bound = position_in_partition::before_key(_next_row.position());
|
||||
@@ -803,31 +742,8 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
|
||||
_current_tombstone = tomb;
|
||||
_read_context.cache()._tracker.on_range_tombstone_read();
|
||||
}
|
||||
|
||||
if (remove_row) {
|
||||
_read_context.cache()._tracker.on_row_compacted_away();
|
||||
|
||||
_lower_bound = position_in_partition::after_key(*_schema, _next_row.position());
|
||||
|
||||
partition_snapshot_row_weakref row_ref(_next_row);
|
||||
move_to_next_entry();
|
||||
|
||||
with_allocator(_snp->region().allocator(), [&] {
|
||||
cache_tracker& tracker = _read_context.cache()._tracker;
|
||||
if (row_ref->is_linked()) {
|
||||
tracker.get_lru().remove(*row_ref);
|
||||
}
|
||||
row_ref->on_evicted(tracker);
|
||||
});
|
||||
|
||||
_snp->region().allocator().invalidate_references();
|
||||
_next_row.force_valid();
|
||||
} else {
|
||||
// We add the row to the buffer even when it's full.
|
||||
// This simplifies the code. For more info see #3139.
|
||||
add_to_buffer(_next_row);
|
||||
move_to_next_entry();
|
||||
}
|
||||
add_to_buffer(_next_row);
|
||||
move_to_next_entry();
|
||||
} else {
|
||||
move_to_next_range();
|
||||
}
|
||||
@@ -978,7 +894,7 @@ void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_curs
|
||||
if (!row.dummy()) {
|
||||
_read_context.cache().on_row_hit();
|
||||
if (_read_context.digest_requested()) {
|
||||
row.latest_row_prepare_hash();
|
||||
row.latest_row().cells().prepare_hash(table_schema(), column_kind::regular_column);
|
||||
}
|
||||
add_clustering_row_to_buffer(mutation_fragment_v2(*_schema, _permit, row.row()));
|
||||
} else {
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
|
||||
#include "gms/endpoint_state.hh"
|
||||
#include "keys.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
#include "replica/database.hh"
|
||||
@@ -26,7 +25,6 @@
|
||||
#include "gms/inet_address.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
|
||||
#include "cdc/generation.hh"
|
||||
@@ -68,10 +66,10 @@ static constexpr auto stream_id_index_shift = stream_id_version_shift + stream_i
|
||||
static constexpr auto stream_id_random_shift = stream_id_index_shift + stream_id_index_bits;
|
||||
|
||||
/**
|
||||
* Responsibility for encoding stream_id moved from the create_stream_ids
|
||||
* function to this constructor, to keep knowledge of composition in a
|
||||
* single place. Note the make_new_generation_description function
|
||||
* defines the "order" in which we view vnodes etc.
|
||||
* Responsibilty for encoding stream_id moved from factory method to
|
||||
* this constructor, to keep knowledge of composition in a single place.
|
||||
* Note this is private and friended to topology_description_generator,
|
||||
* because he is the one who defined the "order" we view vnodes etc.
|
||||
*/
|
||||
stream_id::stream_id(dht::token token, size_t vnode_index)
|
||||
: _value(bytes::initialized_later(), 2 * sizeof(int64_t))
|
||||
@@ -155,18 +153,18 @@ bool token_range_description::operator==(const token_range_description& o) const
|
||||
&& sharding_ignore_msb == o.sharding_ignore_msb;
|
||||
}
|
||||
|
||||
topology_description::topology_description(utils::chunked_vector<token_range_description> entries)
|
||||
topology_description::topology_description(std::vector<token_range_description> entries)
|
||||
: _entries(std::move(entries)) {}
|
||||
|
||||
bool topology_description::operator==(const topology_description& o) const {
|
||||
return _entries == o._entries;
|
||||
}
|
||||
|
||||
const utils::chunked_vector<token_range_description>& topology_description::entries() const& {
|
||||
const std::vector<token_range_description>& topology_description::entries() const& {
|
||||
return _entries;
|
||||
}
|
||||
|
||||
utils::chunked_vector<token_range_description>&& topology_description::entries() && {
|
||||
std::vector<token_range_description>&& topology_description::entries() && {
|
||||
return std::move(_entries);
|
||||
}
|
||||
|
||||
@@ -185,48 +183,98 @@ static std::vector<stream_id> create_stream_ids(
|
||||
return result;
|
||||
}
|
||||
|
||||
class topology_description_generator final {
|
||||
const std::unordered_set<dht::token>& _bootstrap_tokens;
|
||||
const locator::token_metadata_ptr _tmptr;
|
||||
const noncopyable_function<std::pair<size_t, uint8_t> (dht::token)>& _get_sharding_info;
|
||||
|
||||
// Compute a set of tokens that split the token ring into vnodes
|
||||
auto get_tokens() const {
|
||||
auto tokens = _tmptr->sorted_tokens();
|
||||
auto it = tokens.insert(
|
||||
tokens.end(), _bootstrap_tokens.begin(), _bootstrap_tokens.end());
|
||||
std::sort(it, tokens.end());
|
||||
std::inplace_merge(tokens.begin(), it, tokens.end());
|
||||
tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end());
|
||||
return tokens;
|
||||
}
|
||||
|
||||
token_range_description create_description(size_t index, dht::token start, dht::token end) const {
|
||||
token_range_description desc;
|
||||
|
||||
desc.token_range_end = end;
|
||||
|
||||
auto [shard_count, ignore_msb] = _get_sharding_info(end);
|
||||
desc.streams = create_stream_ids(index, start, end, shard_count, ignore_msb);
|
||||
desc.sharding_ignore_msb = ignore_msb;
|
||||
|
||||
return desc;
|
||||
}
|
||||
public:
|
||||
topology_description_generator(
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const locator::token_metadata_ptr tmptr,
|
||||
// This function must return sharding parameters for a node that owns the vnode ending with
|
||||
// the given token. Returns <shard_count, ignore_msb> pair.
|
||||
const noncopyable_function<std::pair<size_t, uint8_t> (dht::token)>& get_sharding_info)
|
||||
: _bootstrap_tokens(bootstrap_tokens)
|
||||
, _tmptr(std::move(tmptr))
|
||||
, _get_sharding_info(get_sharding_info)
|
||||
{}
|
||||
|
||||
/*
|
||||
* Generate a set of CDC stream identifiers such that for each shard
|
||||
* and vnode pair there exists a stream whose token falls into this vnode
|
||||
* and is owned by this shard. It is sometimes not possible to generate
|
||||
* a CDC stream identifier for some (vnode, shard) pair because not all
|
||||
* shards have to own tokens in a vnode. Small vnode can be totally owned
|
||||
* by a single shard. In such case, a stream identifier that maps to
|
||||
* end of the vnode is generated.
|
||||
*
|
||||
* Then build a cdc::topology_description which maps tokens to generated
|
||||
* stream identifiers, such that if token T is owned by shard S in vnode V,
|
||||
* it gets mapped to the stream identifier generated for (S, V).
|
||||
*/
|
||||
// Run in seastar::async context.
|
||||
topology_description generate() const {
|
||||
const auto tokens = get_tokens();
|
||||
|
||||
std::vector<token_range_description> vnode_descriptions;
|
||||
vnode_descriptions.reserve(tokens.size());
|
||||
|
||||
vnode_descriptions.push_back(
|
||||
create_description(0, tokens.back(), tokens.front()));
|
||||
for (size_t idx = 1; idx < tokens.size(); ++idx) {
|
||||
vnode_descriptions.push_back(
|
||||
create_description(idx, tokens[idx - 1], tokens[idx]));
|
||||
}
|
||||
|
||||
return {std::move(vnode_descriptions)};
|
||||
}
|
||||
};
|
||||
|
||||
bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper& g) {
|
||||
auto my_host_id = g.get_host_id(me);
|
||||
return g.for_each_endpoint_state_until([&] (const gms::inet_address& node, const gms::endpoint_state& eps) {
|
||||
return stop_iteration(my_host_id < g.get_host_id(node));
|
||||
}) == stop_iteration::no;
|
||||
auto& eps = g.get_endpoint_states();
|
||||
return std::none_of(eps.begin(), eps.end(),
|
||||
[&] (const std::pair<gms::inet_address, gms::endpoint_state>& ep) {
|
||||
return my_host_id < g.get_host_id(ep.first);
|
||||
});
|
||||
}
|
||||
|
||||
bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm) {
|
||||
if (tm.sorted_tokens().size() != gen.entries().size()) {
|
||||
// We probably have garbage streams from old generations
|
||||
cdc_log.info("Generation size does not match the token ring");
|
||||
return false;
|
||||
} else {
|
||||
std::unordered_set<dht::token> gen_ends;
|
||||
for (const auto& entry : gen.entries()) {
|
||||
gen_ends.insert(entry.token_range_end);
|
||||
}
|
||||
for (const auto& metadata_token : tm.sorted_tokens()) {
|
||||
if (!gen_ends.contains(metadata_token)) {
|
||||
cdc_log.warn("CDC generation missing token {}", metadata_token);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static future<utils::chunked_vector<mutation>> get_common_cdc_generation_mutations(
|
||||
future<utils::chunked_vector<mutation>> get_cdc_generation_mutations(
|
||||
schema_ptr s,
|
||||
const partition_key& pkey,
|
||||
noncopyable_function<clustering_key (dht::token)>&& get_ckey_from_range_end,
|
||||
utils::UUID id,
|
||||
const cdc::topology_description& desc,
|
||||
size_t mutation_size_threshold,
|
||||
api::timestamp_type ts) {
|
||||
utils::chunked_vector<mutation> res;
|
||||
res.emplace_back(s, pkey);
|
||||
res.emplace_back(s, partition_key::from_singular(*s, id));
|
||||
res.back().set_static_cell(to_bytes("num_ranges"), int32_t(desc.entries().size()), ts);
|
||||
size_t size_estimate = 0;
|
||||
size_t total_size_estimate = 0;
|
||||
for (auto& e : desc.entries()) {
|
||||
if (size_estimate >= mutation_size_threshold) {
|
||||
total_size_estimate += size_estimate;
|
||||
res.emplace_back(s, pkey);
|
||||
res.emplace_back(s, partition_key::from_singular(*s, id));
|
||||
size_estimate = 0;
|
||||
}
|
||||
|
||||
@@ -237,60 +285,16 @@ static future<utils::chunked_vector<mutation>> get_common_cdc_generation_mutatio
|
||||
}
|
||||
|
||||
size_estimate += e.streams.size() * 20;
|
||||
auto ckey = get_ckey_from_range_end(e.token_range_end);
|
||||
auto ckey = clustering_key::from_singular(*s, dht::token::to_int64(e.token_range_end));
|
||||
res.back().set_cell(ckey, to_bytes("streams"), make_set_value(db::cdc_streams_set_type, std::move(streams)), ts);
|
||||
res.back().set_cell(ckey, to_bytes("ignore_msb"), int8_t(e.sharding_ignore_msb), ts);
|
||||
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
|
||||
total_size_estimate += size_estimate;
|
||||
|
||||
// Copy mutations n times, where n is picked so that the memory size of all mutations together exceeds `max_command_size`.
|
||||
utils::get_local_injector().inject("cdc_generation_mutations_replication", [&res, total_size_estimate, mutation_size_threshold] {
|
||||
utils::chunked_vector<mutation> new_res;
|
||||
|
||||
size_t number_of_copies = (mutation_size_threshold / total_size_estimate + 1) * 2;
|
||||
for (size_t i = 0; i < number_of_copies; ++i) {
|
||||
std::copy(res.begin(), res.end(), std::back_inserter(new_res));
|
||||
}
|
||||
|
||||
res = std::move(new_res);
|
||||
});
|
||||
|
||||
co_return res;
|
||||
}
|
||||
|
||||
future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v2(
|
||||
schema_ptr s,
|
||||
utils::UUID id,
|
||||
const cdc::topology_description& desc,
|
||||
size_t mutation_size_threshold,
|
||||
api::timestamp_type ts) {
|
||||
auto pkey = partition_key::from_singular(*s, id);
|
||||
auto get_ckey = [s] (dht::token range_end) {
|
||||
return clustering_key::from_singular(*s, dht::token::to_int64(range_end));
|
||||
};
|
||||
|
||||
auto res = co_await get_common_cdc_generation_mutations(s, pkey, std::move(get_ckey), desc, mutation_size_threshold, ts);
|
||||
res.back().set_static_cell(to_bytes("num_ranges"), int32_t(desc.entries().size()), ts);
|
||||
co_return res;
|
||||
}
|
||||
|
||||
future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v3(
|
||||
schema_ptr s,
|
||||
utils::UUID id,
|
||||
const cdc::topology_description& desc,
|
||||
size_t mutation_size_threshold,
|
||||
api::timestamp_type ts) {
|
||||
auto pkey = partition_key::from_singular(*s, CDC_GENERATIONS_V3_KEY);
|
||||
auto get_ckey = [&] (dht::token range_end) {
|
||||
return clustering_key::from_exploded(*s, {timeuuid_type->decompose(id), long_type->decompose(dht::token::to_int64(range_end))}) ;
|
||||
};
|
||||
|
||||
co_return co_await get_common_cdc_generation_mutations(s, pkey, std::move(get_ckey), desc, mutation_size_threshold, ts);
|
||||
}
|
||||
|
||||
// non-static for testing
|
||||
size_t limit_of_streams_in_topology_description() {
|
||||
// Each stream takes 16B and we don't want to exceed 4MB so we can have
|
||||
@@ -323,47 +327,13 @@ topology_description limit_number_of_streams_if_needed(topology_description&& de
|
||||
return topology_description(std::move(entries));
|
||||
}
|
||||
|
||||
// Compute a set of tokens that split the token ring into vnodes.
|
||||
static auto get_tokens(const std::unordered_set<dht::token>& bootstrap_tokens, const locator::token_metadata_ptr tmptr) {
|
||||
auto tokens = tmptr->sorted_tokens();
|
||||
auto it = tokens.insert(tokens.end(), bootstrap_tokens.begin(), bootstrap_tokens.end());
|
||||
std::sort(it, tokens.end());
|
||||
std::inplace_merge(tokens.begin(), it, tokens.end());
|
||||
tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end());
|
||||
return tokens;
|
||||
}
|
||||
|
||||
static token_range_description create_token_range_description(
|
||||
size_t index,
|
||||
dht::token start,
|
||||
dht::token end,
|
||||
const noncopyable_function<std::pair<size_t, uint8_t> (dht::token)>& get_sharding_info) {
|
||||
token_range_description desc;
|
||||
|
||||
desc.token_range_end = end;
|
||||
|
||||
auto [shard_count, ignore_msb] = get_sharding_info(end);
|
||||
desc.streams = create_stream_ids(index, start, end, shard_count, ignore_msb);
|
||||
desc.sharding_ignore_msb = ignore_msb;
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
cdc::topology_description make_new_generation_description(
|
||||
std::pair<utils::UUID, cdc::topology_description> make_new_generation_data(
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const noncopyable_function<std::pair<size_t, uint8_t>(dht::token)>& get_sharding_info,
|
||||
const locator::token_metadata_ptr tmptr) {
|
||||
const auto tokens = get_tokens(bootstrap_tokens, tmptr);
|
||||
|
||||
utils::chunked_vector<token_range_description> vnode_descriptions;
|
||||
vnode_descriptions.reserve(tokens.size());
|
||||
|
||||
vnode_descriptions.push_back(create_token_range_description(0, tokens.back(), tokens.front(), get_sharding_info));
|
||||
for (size_t idx = 1; idx < tokens.size(); ++idx) {
|
||||
vnode_descriptions.push_back(create_token_range_description(idx, tokens[idx - 1], tokens[idx], get_sharding_info));
|
||||
}
|
||||
|
||||
return {std::move(vnode_descriptions)};
|
||||
auto gen = topology_description_generator(bootstrap_tokens, tmptr, get_sharding_info).generate();
|
||||
auto uuid = utils::make_random_uuid();
|
||||
return {uuid, std::move(gen)};
|
||||
}
|
||||
|
||||
db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milliseconds ring_delay) {
|
||||
@@ -395,9 +365,7 @@ future<cdc::generation_id> generation_service::legacy_make_new_generation(const
|
||||
return {sc > 0 ? sc : 1, get_sharding_ignore_msb(*endpoint, _gossiper)};
|
||||
}
|
||||
};
|
||||
|
||||
auto uuid = utils::make_random_uuid();
|
||||
auto gen = make_new_generation_description(bootstrap_tokens, get_sharding_info, tmptr);
|
||||
auto [uuid, gen] = make_new_generation_data(bootstrap_tokens, get_sharding_info, tmptr);
|
||||
|
||||
// Our caller should ensure that there are normal tokens in the token ring.
|
||||
auto normal_token_owners = tmptr->count_normal_token_owners();
|
||||
@@ -451,12 +419,8 @@ future<cdc::generation_id> generation_service::legacy_make_new_generation(const
|
||||
* but if the cluster already supports CDC, then every newly joining node will propose a new CDC generation,
|
||||
* which means it will gossip the generation's timestamp.
|
||||
*/
|
||||
static std::optional<cdc::generation_id> get_generation_id_for(const gms::inet_address& endpoint, const gms::endpoint_state& eps) {
|
||||
const auto* gen_id_ptr = eps.get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
|
||||
if (!gen_id_ptr) {
|
||||
return std::nullopt;
|
||||
}
|
||||
auto gen_id_string = gen_id_ptr->value();
|
||||
static std::optional<cdc::generation_id> get_generation_id_for(const gms::inet_address& endpoint, const gms::gossiper& g) {
|
||||
auto gen_id_string = g.get_application_state_value(endpoint, gms::application_state::CDC_GENERATION_ID);
|
||||
cdc_log.trace("endpoint={}, gen_id_string={}", endpoint, gen_id_string);
|
||||
return gms::versioned_value::cdc_generation_id_from_string(gen_id_string);
|
||||
}
|
||||
@@ -660,21 +624,21 @@ future<> generation_service::maybe_rewrite_streams_descriptions() {
|
||||
|
||||
// For each CDC log table get the TTL setting (from CDC options) and the table's creation time
|
||||
std::vector<time_and_ttl> times_and_ttls;
|
||||
_db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> t) {
|
||||
auto& s = *t->schema();
|
||||
for (auto& [_, cf] : _db.get_column_families()) {
|
||||
auto& s = *cf->schema();
|
||||
auto base = cdc::get_base_table(_db, s.ks_name(), s.cf_name());
|
||||
if (!base) {
|
||||
// Not a CDC log table.
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
auto& cdc_opts = base->cdc_options();
|
||||
if (!cdc_opts.enabled()) {
|
||||
// This table is named like a CDC log table but it's not one.
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
|
||||
times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id().uuid()), cdc_opts.ttl()});
|
||||
});
|
||||
}
|
||||
|
||||
if (times_and_ttls.empty()) {
|
||||
// There's no point in rewriting old generations' streams (they don't contain any data).
|
||||
@@ -762,8 +726,8 @@ future<> generation_service::stop() {
|
||||
cdc_log.error("CDC stream rewrite failed: ", std::current_exception());
|
||||
}
|
||||
|
||||
if (_joined && (this_shard_id() == 0)) {
|
||||
co_await leave_ring();
|
||||
if (this_shard_id() == 0) {
|
||||
co_await _gossiper.unregister_(shared_from_this());
|
||||
}
|
||||
|
||||
_stopped = true;
|
||||
@@ -775,6 +739,7 @@ generation_service::~generation_service() {
|
||||
|
||||
future<> generation_service::after_join(std::optional<cdc::generation_id>&& startup_gen_id) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
assert(_sys_ks.local().bootstrap_complete());
|
||||
|
||||
_gen_id = std::move(startup_gen_id);
|
||||
_gossiper.register_(shared_from_this());
|
||||
@@ -792,24 +757,18 @@ future<> generation_service::after_join(std::optional<cdc::generation_id>&& star
|
||||
_cdc_streams_rewrite_complete = maybe_rewrite_streams_descriptions();
|
||||
}
|
||||
|
||||
future<> generation_service::leave_ring() {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
_joined = false;
|
||||
co_await _gossiper.unregister_(shared_from_this());
|
||||
}
|
||||
|
||||
future<> generation_service::on_join(gms::inet_address ep, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
|
||||
future<> generation_service::on_join(gms::inet_address ep, gms::endpoint_state ep_state) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
auto val = ep_state->get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
|
||||
auto val = ep_state.get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
|
||||
if (!val) {
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
return on_change(ep, gms::application_state::CDC_GENERATION_ID, *val, pid);
|
||||
return on_change(ep, gms::application_state::CDC_GENERATION_ID, *val);
|
||||
}
|
||||
|
||||
future<> generation_service::on_change(gms::inet_address ep, gms::application_state app_state, const gms::versioned_value& v, gms::permit_id) {
|
||||
future<> generation_service::on_change(gms::inet_address ep, gms::application_state app_state, const gms::versioned_value& v) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
if (app_state != gms::application_state::CDC_GENERATION_ID) {
|
||||
@@ -829,21 +788,22 @@ future<> generation_service::check_and_repair_cdc_streams() {
|
||||
}
|
||||
|
||||
std::optional<cdc::generation_id> latest = _gen_id;
|
||||
_gossiper.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& state) {
|
||||
const auto& endpoint_states = _gossiper.get_endpoint_states();
|
||||
for (const auto& [addr, state] : endpoint_states) {
|
||||
if (_gossiper.is_left(addr)) {
|
||||
cdc_log.info("check_and_repair_cdc_streams ignored node {} because it is in LEFT state", addr);
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
if (!_gossiper.is_normal(addr)) {
|
||||
throw std::runtime_error(format("All nodes must be in NORMAL or LEFT state while performing check_and_repair_cdc_streams"
|
||||
" ({} is in state {})", addr, _gossiper.get_gossip_status(state)));
|
||||
}
|
||||
|
||||
const auto gen_id = get_generation_id_for(addr, state);
|
||||
const auto gen_id = get_generation_id_for(addr, _gossiper);
|
||||
if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
|
||||
latest = gen_id;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
auto tmptr = _token_metadata.get();
|
||||
auto sys_dist_ks = get_sys_dist_ks();
|
||||
@@ -898,9 +858,24 @@ future<> generation_service::check_and_repair_cdc_streams() {
|
||||
" even though some node gossiped about it.",
|
||||
latest, db_clock::now());
|
||||
should_regenerate = true;
|
||||
} else if (!is_cdc_generation_optimal(*gen, *tmptr)) {
|
||||
should_regenerate = true;
|
||||
cdc_log.info("CDC generation {} needs repair, regenerating", latest);
|
||||
} else {
|
||||
if (tmptr->sorted_tokens().size() != gen->entries().size()) {
|
||||
// We probably have garbage streams from old generations
|
||||
cdc_log.info("Generation size does not match the token ring, regenerating");
|
||||
should_regenerate = true;
|
||||
} else {
|
||||
std::unordered_set<dht::token> gen_ends;
|
||||
for (const auto& entry : gen->entries()) {
|
||||
gen_ends.insert(entry.token_range_end);
|
||||
}
|
||||
for (const auto& metadata_token : tmptr->sorted_tokens()) {
|
||||
if (!gen_ends.contains(metadata_token)) {
|
||||
cdc_log.warn("CDC generation {} missing token {}. Regenerating.", latest, metadata_token);
|
||||
should_regenerate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -960,13 +935,17 @@ future<> generation_service::legacy_handle_cdc_generation(std::optional<cdc::gen
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (!_sys_dist_ks.local_is_initialized() || !_sys_dist_ks.local().started()) {
|
||||
on_internal_error(cdc_log, "Legacy handle CDC generation with sys.dist.ks. down");
|
||||
if (!_sys_ks.local().bootstrap_complete() || !_sys_dist_ks.local_is_initialized()
|
||||
|| !_sys_dist_ks.local().started()) {
|
||||
// The service should not be listening for generation changes until after the node
|
||||
// is bootstrapped. Therefore we would previously assume that this condition
|
||||
// can never become true and call on_internal_error here, but it turns out that
|
||||
// it may become true on decommission: the node enters NEEDS_BOOTSTRAP
|
||||
// state before leaving the token ring, so bootstrap_complete() becomes false.
|
||||
// In that case we can simply return.
|
||||
co_return;
|
||||
}
|
||||
|
||||
// The service should not be listening for generation changes until after the node
|
||||
// is bootstrapped and since the node leaves the ring on decommission
|
||||
|
||||
if (co_await container().map_reduce(and_reducer(), [ts = get_ts(*gen_id)] (generation_service& svc) {
|
||||
return !svc._cdc_metadata.prepare(ts);
|
||||
})) {
|
||||
@@ -1029,12 +1008,12 @@ future<> generation_service::legacy_scan_cdc_generations() {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
std::optional<cdc::generation_id> latest;
|
||||
_gossiper.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state& eps) {
|
||||
auto gen_id = get_generation_id_for(node, eps);
|
||||
for (const auto& ep: _gossiper.get_endpoint_states()) {
|
||||
auto gen_id = get_generation_id_for(ep.first, _gossiper);
|
||||
if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
|
||||
latest = gen_id;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (latest) {
|
||||
cdc_log.info("Latest generation seen during startup: {}", *latest);
|
||||
|
||||
@@ -92,13 +92,13 @@ struct token_range_description {
|
||||
* in the `_entries` vector. See the comment above `token_range_description` for explanation.
|
||||
*/
|
||||
class topology_description {
|
||||
utils::chunked_vector<token_range_description> _entries;
|
||||
std::vector<token_range_description> _entries;
|
||||
public:
|
||||
topology_description(utils::chunked_vector<token_range_description> entries);
|
||||
topology_description(std::vector<token_range_description> entries);
|
||||
bool operator==(const topology_description&) const;
|
||||
|
||||
const utils::chunked_vector<token_range_description>& entries() const&;
|
||||
utils::chunked_vector<token_range_description>&& entries() &&;
|
||||
const std::vector<token_range_description>& entries() const&;
|
||||
std::vector<token_range_description>&& entries() &&;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -133,28 +133,7 @@ public:
|
||||
*/
|
||||
bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper&);
|
||||
|
||||
/*
|
||||
* Checks if the CDC generation is optimal, which is true if its `topology_description` is consistent
|
||||
* with `token_metadata`.
|
||||
*/
|
||||
bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm);
|
||||
|
||||
/*
|
||||
* Generate a set of CDC stream identifiers such that for each shard
|
||||
* and vnode pair there exists a stream whose token falls into this vnode
|
||||
* and is owned by this shard. It is sometimes not possible to generate
|
||||
* a CDC stream identifier for some (vnode, shard) pair because not all
|
||||
* shards have to own tokens in a vnode. Small vnode can be totally owned
|
||||
* by a single shard. In such case, a stream identifier that maps to
|
||||
* end of the vnode is generated.
|
||||
*
|
||||
* Then build a cdc::topology_description which maps tokens to generated
|
||||
* stream identifiers, such that if token T is owned by shard S in vnode V,
|
||||
* it gets mapped to the stream identifier generated for (S, V).
|
||||
*
|
||||
* Run in seastar::async context.
|
||||
*/
|
||||
cdc::topology_description make_new_generation_description(
|
||||
std::pair<utils::UUID, cdc::topology_description> make_new_generation_data(
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const noncopyable_function<std::pair<size_t, uint8_t> (dht::token)>& get_sharding_info,
|
||||
const locator::token_metadata_ptr);
|
||||
@@ -165,20 +144,9 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli
|
||||
// using `mutation_size_threshold` to decide on the mutation sizes. The partition key of each mutation
|
||||
// is given by `gen_uuid`. The timestamp of each cell in each mutation is given by `mutation_timestamp`.
|
||||
//
|
||||
// Works only for the CDC_GENERATIONS_V2 schema (in system_distributed keyspace).
|
||||
future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v2(
|
||||
schema_ptr, utils::UUID gen_uuid, const cdc::topology_description&,
|
||||
size_t mutation_size_threshold, api::timestamp_type mutation_timestamp);
|
||||
|
||||
// The partition key of all rows in the single-partition CDC_GENERATIONS_V3 schema (in system keyspace).
|
||||
static constexpr auto CDC_GENERATIONS_V3_KEY = "cdc_generations";
|
||||
|
||||
// Translates the CDC generation data given by a `cdc::topology_description` into a vector of mutations,
|
||||
// using `mutation_size_threshold` to decide on the mutation sizes. The first clustering key column is
|
||||
// given by `gen_uuid`. The timestamp of each cell in each mutation is given by `mutation_timestamp`.
|
||||
//
|
||||
// Works only for the CDC_GENERATIONS_V3 schema (in system keyspace).
|
||||
future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v3(
|
||||
// Works for only specific schemas: CDC_GENERATIONS_V2 (in system_distributed_keyspace)
|
||||
// and CDC_GENERATIONS_V3 (in system_keyspace).
|
||||
future<utils::chunked_vector<mutation>> get_cdc_generation_mutations(
|
||||
schema_ptr, utils::UUID gen_uuid, const cdc::topology_description&,
|
||||
size_t mutation_size_threshold, api::timestamp_type mutation_timestamp);
|
||||
|
||||
|
||||
@@ -98,20 +98,19 @@ public:
|
||||
* Must be called on shard 0 - that's where the generation management happens.
|
||||
*/
|
||||
future<> after_join(std::optional<cdc::generation_id>&& startup_gen_id);
|
||||
future<> leave_ring();
|
||||
|
||||
cdc::metadata& get_cdc_metadata() {
|
||||
return _cdc_metadata;
|
||||
}
|
||||
|
||||
virtual future<> before_change(gms::inet_address, gms::endpoint_state_ptr, gms::application_state, const gms::versioned_value&) override { return make_ready_future(); }
|
||||
virtual future<> on_alive(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
|
||||
virtual future<> on_dead(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
|
||||
virtual future<> on_remove(gms::inet_address, gms::permit_id) override { return make_ready_future(); }
|
||||
virtual future<> on_restart(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
|
||||
virtual future<> before_change(gms::inet_address, gms::endpoint_state, gms::application_state, const gms::versioned_value&) override { return make_ready_future(); }
|
||||
virtual future<> on_alive(gms::inet_address, gms::endpoint_state) override { return make_ready_future(); }
|
||||
virtual future<> on_dead(gms::inet_address, gms::endpoint_state) override { return make_ready_future(); }
|
||||
virtual future<> on_remove(gms::inet_address) override { return make_ready_future(); }
|
||||
virtual future<> on_restart(gms::inet_address, gms::endpoint_state) override { return make_ready_future(); }
|
||||
|
||||
virtual future<> on_join(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override;
|
||||
virtual future<> on_change(gms::inet_address, gms::application_state, const gms::versioned_value&, gms::permit_id) override;
|
||||
virtual future<> on_join(gms::inet_address, gms::endpoint_state) override;
|
||||
virtual future<> on_change(gms::inet_address, gms::application_state, const gms::versioned_value&) override;
|
||||
|
||||
future<> check_and_repair_cdc_streams();
|
||||
|
||||
|
||||
@@ -160,7 +160,7 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
void on_before_create_column_family(const keyspace_metadata& ksm, const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
|
||||
void on_before_create_column_family(const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
|
||||
if (schema.cdc_options().enabled()) {
|
||||
auto& db = _ctxt._proxy.get_db().local();
|
||||
auto logname = log_name(schema.cf_name());
|
||||
|
||||
@@ -40,7 +40,7 @@ static cdc::stream_id get_stream(
|
||||
|
||||
// non-static for testing
|
||||
cdc::stream_id get_stream(
|
||||
const utils::chunked_vector<cdc::token_range_description>& entries,
|
||||
const std::vector<cdc::token_range_description>& entries,
|
||||
dht::token tok) {
|
||||
if (entries.empty()) {
|
||||
on_internal_error(cdc_log, "get_stream: entries empty");
|
||||
|
||||
@@ -21,27 +21,27 @@ public:
|
||||
: file_impl(*get_file_impl(f)), _error_handler(error_handler), _file(f) {
|
||||
}
|
||||
|
||||
virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, io_intent* intent) override {
|
||||
virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) override {
|
||||
return do_io_check(_error_handler, [&] {
|
||||
return get_file_impl(_file)->write_dma(pos, buffer, len, intent);
|
||||
return get_file_impl(_file)->write_dma(pos, buffer, len, pc);
|
||||
});
|
||||
}
|
||||
|
||||
virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, io_intent* intent) override {
|
||||
virtual future<size_t> write_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) override {
|
||||
return do_io_check(_error_handler, [&] {
|
||||
return get_file_impl(_file)->write_dma(pos, iov, intent);
|
||||
return get_file_impl(_file)->write_dma(pos, iov, pc);
|
||||
});
|
||||
}
|
||||
|
||||
virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, io_intent* intent) override {
|
||||
virtual future<size_t> read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) override {
|
||||
return do_io_check(_error_handler, [&] {
|
||||
return get_file_impl(_file)->read_dma(pos, buffer, len, intent);
|
||||
return get_file_impl(_file)->read_dma(pos, buffer, len, pc);
|
||||
});
|
||||
}
|
||||
|
||||
virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, io_intent* intent) override {
|
||||
virtual future<size_t> read_dma(uint64_t pos, std::vector<iovec> iov, const io_priority_class& pc) override {
|
||||
return do_io_check(_error_handler, [&] {
|
||||
return get_file_impl(_file)->read_dma(pos, iov, intent);
|
||||
return get_file_impl(_file)->read_dma(pos, iov, pc);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -99,9 +99,9 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, io_intent* intent) override {
|
||||
virtual future<temporary_buffer<uint8_t>> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) override {
|
||||
return do_io_check(_error_handler, [&] {
|
||||
return get_file_impl(_file)->dma_read_bulk(offset, range_size, intent);
|
||||
return get_file_impl(_file)->dma_read_bulk(offset, range_size, pc);
|
||||
});
|
||||
}
|
||||
private:
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
#
|
||||
# Copyright 2023-present ScyllaDB
|
||||
#
|
||||
|
||||
#
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
#
|
||||
find_path(rapidxml_INCLUDE_DIR
|
||||
NAMES rapidxml.h rapidxml/rapidxml.hpp)
|
||||
|
||||
mark_as_advanced(
|
||||
rapidxml_INCLUDE_DIR)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package_handle_standard_args(rapidxml
|
||||
REQUIRED_VARS
|
||||
rapidxml_INCLUDE_DIR)
|
||||
|
||||
if(rapidxml_FOUND)
|
||||
if(NOT TARGET rapidxml::rapidxml)
|
||||
add_library(rapidxml::rapidxml INTERFACE IMPORTED)
|
||||
set_target_properties(rapidxml::rapidxml
|
||||
PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${rapidxml_INCLUDE_DIR})
|
||||
endif()
|
||||
endif()
|
||||
@@ -1,31 +1,20 @@
|
||||
###
|
||||
### Generate version file and supply appropriate compile definitions for release.cc
|
||||
###
|
||||
function(generate_scylla_version)
|
||||
function(add_version_library name source)
|
||||
set(version_file ${CMAKE_CURRENT_BINARY_DIR}/SCYLLA-VERSION-FILE)
|
||||
set(release_file ${CMAKE_CURRENT_BINARY_DIR}/SCYLLA-RELEASE-FILE)
|
||||
set(product_file ${CMAKE_CURRENT_BINARY_DIR}/SCYLLA-PRODUCT-FILE)
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/SCYLLA-VERSION-GEN --output-dir "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
|
||||
|
||||
file(STRINGS ${version_file} scylla_version)
|
||||
file(STRINGS ${release_file} scylla_release)
|
||||
file(STRINGS ${product_file} scylla_product)
|
||||
|
||||
string(REPLACE "-" "~" scylla_version_tilde ${scylla_version})
|
||||
|
||||
set(Scylla_VERSION "${scylla_version_tilde}" CACHE INTERNAL "")
|
||||
set(Scylla_RELEASE "${scylla_release}" CACHE INTERNAL "")
|
||||
set(Scylla_PRODUCT "${scylla_product}" CACHE INTERNAL "")
|
||||
endfunction(generate_scylla_version)
|
||||
|
||||
function(add_version_library name source)
|
||||
add_library(${name} OBJECT ${source})
|
||||
target_compile_definitions(${name}
|
||||
PRIVATE
|
||||
SCYLLA_VERSION=\"${Scylla_VERSION}\"
|
||||
SCYLLA_RELEASE=\"${Scylla_RELEASE}\")
|
||||
SCYLLA_VERSION=\"${scylla_version}\"
|
||||
SCYLLA_RELEASE=\"${scylla_release}\")
|
||||
target_link_libraries(${name}
|
||||
PRIVATE
|
||||
Seastar::seastar)
|
||||
|
||||
@@ -5,6 +5,15 @@
|
||||
# actually compiling a sample program.
|
||||
function(add_whole_archive name library)
|
||||
add_library(${name} INTERFACE)
|
||||
target_link_libraries(${name} INTERFACE
|
||||
"$<LINK_LIBRARY:WHOLE_ARCHIVE,${library}>")
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.24)
|
||||
target_link_libraries(${name} INTERFACE
|
||||
"$<LINK_LIBRARY:WHOLE_ARCHIVE,${library}>")
|
||||
else()
|
||||
add_dependencies(${name} ${library})
|
||||
target_include_directories(${name} INTERFACE
|
||||
${CMAKE_SOURCE_DIR})
|
||||
target_link_options(auth INTERFACE
|
||||
"$<$<CXX_COMPILER_ID:Clang>:SHELL:LINKER:-force_load $<TARGET_LINKER_FILE:${library}>>"
|
||||
"$<$<CXX_COMPILER_ID:GNU>:SHELL:LINKER:--whole-archive $<TARGET_LINKER_FILE:${library}> LINKER:--no-whole-archive>")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
function(build_submodule name dir)
|
||||
cmake_parse_arguments(parsed_args "NOARCH" "" "" ${ARGN})
|
||||
set(version_release "${Scylla_VERSION}-${Scylla_RELEASE}")
|
||||
set(product_version_release
|
||||
"${Scylla_PRODUCT}-${Scylla_VERSION}-${Scylla_RELEASE}")
|
||||
set(working_dir ${CMAKE_CURRENT_SOURCE_DIR}/${dir})
|
||||
if(parsed_args_NOARCH)
|
||||
set(arch "noarch")
|
||||
else()
|
||||
set(arch "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
set(reloc_args ${parsed_args_UNPARSED_ARGUMENTS})
|
||||
set(reloc_pkg "${working_dir}/build/${Scylla_PRODUCT}-${name}-${version_release}.${arch}.tar.gz")
|
||||
add_custom_command(
|
||||
OUTPUT ${reloc_pkg}
|
||||
COMMAND reloc/build_reloc.sh --version ${product_version_release} --nodeps ${reloc_args}
|
||||
WORKING_DIRECTORY "${working_dir}"
|
||||
JOB_POOL submodule_pool)
|
||||
add_custom_target(dist-${name}-tar
|
||||
DEPENDS ${reloc_pkg})
|
||||
add_custom_target(dist-${name}-rpm
|
||||
COMMAND reloc/build_rpm.sh --reloc-pkg ${reloc_pkg}
|
||||
DEPENDS ${reloc_pkg}
|
||||
WORKING_DIRECTORY "${working_dir}")
|
||||
add_custom_target(dist-${name}-deb
|
||||
COMMAND reloc/build_deb.sh --reloc-pkg ${reloc_pkg}
|
||||
DEPENDS ${reloc_pkg}
|
||||
WORKING_DIRECTORY "${working_dir}")
|
||||
add_custom_target(dist-${name}
|
||||
DEPENDS dist-${name}-tar dist-${name}-rpm dist-${name}-deb)
|
||||
endfunction()
|
||||
|
||||
macro(dist_submodule name dir pkgs)
|
||||
# defined as a macro, so that we can append the path to the dist tarball to
|
||||
# specfied "pkgs"
|
||||
cmake_parse_arguments(parsed_args "NOARCH" "" "" ${ARGN})
|
||||
if(parsed_args_NOARCH)
|
||||
set(arch "noarch")
|
||||
else()
|
||||
set(arch "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
set(pkg_name "${Scylla_PRODUCT}-${name}-${Scylla_VERSION}-${Scylla_RELEASE}.${arch}.tar.gz")
|
||||
set(reloc_pkg "${CMAKE_SOURCE_DIR}/tools/${dir}/build/${pkg_name}")
|
||||
set(dist_pkg "${CMAKE_CURRENT_BINARY_DIR}/${pkg_name}")
|
||||
add_custom_command(
|
||||
OUTPUT ${dist_pkg}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${reloc_pkg} ${dist_pkg}
|
||||
DEPENDS dist-${name}-tar)
|
||||
list(APPEND ${pkgs} "${dist_pkg}")
|
||||
endmacro()
|
||||
@@ -1,5 +1,7 @@
|
||||
find_program (ANTLR3 antlr3
|
||||
REQUIRED)
|
||||
find_program (ANTLR3 antlr3)
|
||||
if(NOT ANTLR3)
|
||||
message(FATAL "antlr3 is required")
|
||||
endif()
|
||||
|
||||
# Parse antlr3 grammar files and generate C++ sources
|
||||
function(generate_cql_grammar)
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
set(Seastar_OptimizationLevel_COVERAGE "g")
|
||||
set(CMAKE_CXX_FLAGS_COVERAGE
|
||||
""
|
||||
CACHE
|
||||
INTERNAL
|
||||
"")
|
||||
string(APPEND CMAKE_CXX_FLAGS_COVERAGE
|
||||
" -O${Seastar_OptimizationLevel_SANITIZE}")
|
||||
|
||||
set(Seastar_DEFINITIONS_COVERAGE
|
||||
SCYLLA_BUILD_MODE=debug
|
||||
DEBUG
|
||||
SANITIZE
|
||||
DEBUG_LSA_SANITIZER
|
||||
SCYLLA_ENABLE_ERROR_INJECTION)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_COVERAGE
|
||||
" -O${Seastar_OptimizationLevel_COVERAGE} -fprofile-instr-generate -fcoverage-mapping -g -gz")
|
||||
|
||||
set(CMAKE_STATIC_LINKER_FLAGS_COVERAGE
|
||||
"-fprofile-instr-generate -fcoverage-mapping")
|
||||
|
||||
set(stack_usage_threshold_in_KB 40)
|
||||
@@ -12,15 +12,16 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64")
|
||||
else()
|
||||
set(clang_inline_threshold 2500)
|
||||
endif()
|
||||
add_compile_options(
|
||||
"$<$<CXX_COMPILER_ID:GNU>:--param;inline-unit-growth=300>"
|
||||
"$<$<CXX_COMPILER_ID:Clang>:-mllvm;-inline-threshold=${clang_inline_threshold}>"
|
||||
string(APPEND CMAKE_CXX_FLAGS_RELEASE
|
||||
" $<$<CXX_COMPILER_ID:GNU>:--param inline-unit-growth=300"
|
||||
" $<$<CXX_COMPILER_ID:Clang>:-mllvm -inline-threshold=${clang_inline_threshold}>"
|
||||
# clang generates 16-byte loads that break store-to-load forwarding
|
||||
# gcc also has some trouble: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103554
|
||||
"-fno-slp-vectorize")
|
||||
set(Seastar_DEFINITIONS_RELEASE
|
||||
" -fno-slp-vectorize")
|
||||
set(Seastar_DEFINITIONS_DEBUG
|
||||
SCYLLA_BUILD_MODE=release)
|
||||
|
||||
add_link_options("LINKER:--gc-sections")
|
||||
set(CMAKE_STATIC_LINKER_FLAGS_RELEASE
|
||||
"-Wl,--gc-sections")
|
||||
|
||||
set(stack_usage_threshold_in_KB 13)
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
set(Seastar_OptimizationLevel_SANITIZE "s")
|
||||
set(CMAKE_CXX_FLAGS_SANITIZE
|
||||
""
|
||||
CACHE
|
||||
INTERNAL
|
||||
"")
|
||||
string(APPEND CMAKE_CXX_FLAGS_SANITIZE
|
||||
" -O${Seastar_OptimizationLevel_SANITIZE}")
|
||||
|
||||
set(Seastar_DEFINITIONS_SANITIZE
|
||||
SCYLLA_BUILD_MODE=sanitize
|
||||
DEBUG
|
||||
SANITIZE
|
||||
DEBUG_LSA_SANITIZER
|
||||
SCYLLA_ENABLE_ERROR_INJECTION)
|
||||
|
||||
set(stack_usage_threshold_in_KB 50)
|
||||
@@ -11,117 +11,31 @@ foreach(warning ${disabled_warnings})
|
||||
endif()
|
||||
endforeach()
|
||||
list(TRANSFORM _supported_warnings PREPEND "-Wno-")
|
||||
add_compile_options(
|
||||
string(JOIN " " CMAKE_CXX_FLAGS
|
||||
"-Wall"
|
||||
"-Werror"
|
||||
"-Wno-error=deprecated-declarations"
|
||||
"-Wimplicit-fallthrough"
|
||||
${_supported_warnings})
|
||||
|
||||
function(default_target_arch arch)
|
||||
set(x86_instruction_sets i386 i686 x86_64)
|
||||
if(CMAKE_SYSTEM_PROCESSOR IN_LIST x86_instruction_sets)
|
||||
set(${arch} "westmere" PARENT_SCOPE)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
||||
# we always use intrinsics like vmull.p64 for speeding up crc32 calculations
|
||||
# on the aarch64 architectures, and they require the crypto extension, so
|
||||
# we have to add "+crypto" in the architecture flags passed to -march. the
|
||||
# same applies to crc32 instructions, which need the ARMv8-A CRC32 extension
|
||||
# please note, Seastar also sets -march when compiled with DPDK enabled.
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR EQUAL "aarch64")
|
||||
set(${arch} "armv8-a+crc+crypto" PARENT_SCOPE)
|
||||
else()
|
||||
set(${arch} "" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(pad_at_begin output fill str length)
|
||||
# pad the given `${str} with `${fill}`, right aligned. with the syntax of
|
||||
# fmtlib:
|
||||
# fmt::print("{:#>{}}", str, length)
|
||||
# where `#` is the `${fill}` char
|
||||
string(LENGTH "${str}" str_len)
|
||||
math(EXPR padding_len "${length} - ${str_len}")
|
||||
if(padding_len GREATER 0)
|
||||
string(REPEAT ${fill} ${padding_len} padding)
|
||||
endif()
|
||||
set(${output} "${padding}${str}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# The relocatable package includes its own dynamic linker. We don't
|
||||
# know the path it will be installed to, so for now use a very long
|
||||
# path so that patchelf doesn't need to edit the program headers. The
|
||||
# kernel imposes a limit of 4096 bytes including the null. The other
|
||||
# constraint is that the build-id has to be in the first page, so we
|
||||
# can't use all 4096 bytes for the dynamic linker.
|
||||
# In here we just guess that 2000 extra / should be enough to cover
|
||||
# any path we get installed to but not so large that the build-id is
|
||||
# pushed to the second page.
|
||||
# At the end of the build we check that the build-id is indeed in the
|
||||
# first page. At install time we check that patchelf doesn't modify
|
||||
# the program headers.
|
||||
function(get_padded_dynamic_linker_option output length)
|
||||
set(dynamic_linker_option "-dynamic-linker")
|
||||
# capture the drive-generated command line first
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} "-###" /dev/null -o t
|
||||
ERROR_VARIABLE driver_command_line
|
||||
ERROR_STRIP_TRAILING_WHITESPACE)
|
||||
# extract the argument for the "-dynamic-linker" option
|
||||
if(driver_command_line MATCHES ".*\"?${dynamic_linker_option}\"? \"?([^ \"]*)\"? .*")
|
||||
set(dynamic_linker ${CMAKE_MATCH_1})
|
||||
else()
|
||||
message(FATAL_ERROR "Unable to find ${dynamic_linker_option} in driver-generated command: "
|
||||
"${driver_command_line}")
|
||||
endif()
|
||||
# prefixing a path with "/"s does not actually change it means
|
||||
pad_at_begin(padded_dynamic_linker "/" "${dynamic_linker}" ${length})
|
||||
set(${output} "${dynamic_linker_option}=${padded_dynamic_linker}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
add_compile_options("-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
|
||||
default_target_arch(target_arch)
|
||||
if(target_arch)
|
||||
add_compile_options("-march=${target_arch}")
|
||||
string(APPEND CMAKE_CXX_FLAGS " -march=${target_arch}")
|
||||
endif()
|
||||
|
||||
math(EXPR _stack_usage_threshold_in_bytes "${stack_usage_threshold_in_KB} * 1024")
|
||||
set(_stack_usage_threshold_flag "-Wstack-usage=${_stack_usage_threshold_in_bytes}")
|
||||
check_cxx_compiler_flag(${_stack_usage_threshold_flag} _stack_usage_flag_supported)
|
||||
if(_stack_usage_flag_supported)
|
||||
add_compile_options("${_stack_usage_threshold_flag}")
|
||||
string(APPEND CMAKE_CXX_FLAGS " ${_stack_usage_threshold_flag}")
|
||||
endif()
|
||||
|
||||
# Force SHA1 build-id generation
|
||||
add_link_options("LINKER:--build-id=sha1")
|
||||
include(CheckLinkerFlag)
|
||||
set(Scylla_USE_LINKER
|
||||
""
|
||||
CACHE
|
||||
STRING
|
||||
"Use specified linker instead of the default one")
|
||||
if(Scylla_USE_LINKER)
|
||||
set(linkers "${Scylla_USE_LINKER}")
|
||||
else()
|
||||
set(linkers "lld" "gold")
|
||||
endif()
|
||||
|
||||
foreach(linker ${linkers})
|
||||
set(linker_flag "-fuse-ld=${linker}")
|
||||
check_linker_flag(CXX ${linker_flag} "CXX_LINKER_HAVE_${linker}")
|
||||
if(CXX_LINKER_HAVE_${linker})
|
||||
add_link_options("${linker_flag}")
|
||||
break()
|
||||
elseif(Scylla_USE_LINKER)
|
||||
message(FATAL_ERROR "${Scylla_USE_LINKER} is not supported.")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(DEFINED ENV{NIX_CC})
|
||||
get_padded_dynamic_linker_option(dynamic_linker_option 0)
|
||||
else()
|
||||
# gdb has a SO_NAME_MAX_PATH_SIZE of 512, so limit the path size to
|
||||
# that. The 512 includes the null at the end, hence the 511 bellow.
|
||||
get_padded_dynamic_linker_option(dynamic_linker_option 511)
|
||||
endif()
|
||||
add_link_options("${dynamic_linker_option}")
|
||||
|
||||
@@ -29,27 +29,32 @@
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "sstables/exceptions.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "sstables/sstable_writer.hh"
|
||||
#include "sstables/progress_monitor.hh"
|
||||
#include "sstables/sstables_manager.hh"
|
||||
#include "compaction.hh"
|
||||
#include "compaction_manager.hh"
|
||||
#include "schema/schema.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "service/priority_manager.hh"
|
||||
#include "db_clock.hh"
|
||||
#include "mutation/mutation_compactor.hh"
|
||||
#include "leveled_manifest.hh"
|
||||
#include "dht/token.hh"
|
||||
#include "dht/partition_filter.hh"
|
||||
#include "mutation_writer/shard_based_splitting_writer.hh"
|
||||
#include "mutation_writer/partition_based_splitting_writer.hh"
|
||||
#include "mutation/mutation_source_metadata.hh"
|
||||
#include "mutation/mutation_fragment_stream_validator.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
#include "utils/utf8.hh"
|
||||
#include "utils/fmt-compat.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "readers/multi_range.hh"
|
||||
#include "readers/filtering.hh"
|
||||
#include "readers/compacting.hh"
|
||||
#include "tombstone_gc.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "keys.hh"
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -143,6 +148,25 @@ std::ostream& operator<<(std::ostream& os, compaction_type_options::scrub::quara
|
||||
return os << to_string(quarantine_mode);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, pretty_printed_data_size data) {
|
||||
static constexpr const char* suffixes[] = { " bytes", "kB", "MB", "GB", "TB", "PB" };
|
||||
|
||||
unsigned exp = 0;
|
||||
while ((data._size >= 1000) && (exp < sizeof(suffixes))) {
|
||||
exp++;
|
||||
data._size /= 1000;
|
||||
}
|
||||
|
||||
os << data._size << suffixes[exp];
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, pretty_printed_throughput tp) {
|
||||
uint64_t throughput = tp._duration.count() > 0 ? tp._size / tp._duration.count() : 0;
|
||||
os << pretty_printed_data_size(throughput) << "/s";
|
||||
return os;
|
||||
}
|
||||
|
||||
static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_s, sstable_set::incremental_selector& selector,
|
||||
const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks) {
|
||||
if (!table_s.tombstone_gc_enabled()) [[unlikely]] {
|
||||
@@ -325,21 +349,16 @@ public:
|
||||
void consume_end_of_stream();
|
||||
};
|
||||
|
||||
using use_backlog_tracker = bool_class<class use_backlog_tracker_tag>;
|
||||
|
||||
struct compaction_read_monitor_generator final : public read_monitor_generator {
|
||||
class compaction_read_monitor final : public sstables::read_monitor, public backlog_read_progress_manager {
|
||||
sstables::shared_sstable _sst;
|
||||
table_state& _table_s;
|
||||
const sstables::reader_position_tracker* _tracker = nullptr;
|
||||
uint64_t _last_position_seen = 0;
|
||||
use_backlog_tracker _use_backlog_tracker;
|
||||
public:
|
||||
virtual void on_read_started(const sstables::reader_position_tracker& tracker) override {
|
||||
_tracker = &tracker;
|
||||
if (_use_backlog_tracker) {
|
||||
_table_s.get_backlog_tracker().register_compacting_sstable(_sst, *this);
|
||||
}
|
||||
_table_s.get_backlog_tracker().register_compacting_sstable(_sst, *this);
|
||||
}
|
||||
|
||||
virtual void on_read_completed() override {
|
||||
@@ -357,19 +376,19 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
|
||||
}
|
||||
|
||||
void remove_sstable() {
|
||||
if (_sst && _use_backlog_tracker) {
|
||||
if (_sst) {
|
||||
_table_s.get_backlog_tracker().revert_charges(_sst);
|
||||
}
|
||||
_sst = {};
|
||||
}
|
||||
|
||||
compaction_read_monitor(sstables::shared_sstable sst, table_state& table_s, use_backlog_tracker use_backlog_tracker)
|
||||
: _sst(std::move(sst)), _table_s(table_s), _use_backlog_tracker(use_backlog_tracker) { }
|
||||
compaction_read_monitor(sstables::shared_sstable sst, table_state& table_s)
|
||||
: _sst(std::move(sst)), _table_s(table_s) { }
|
||||
|
||||
~compaction_read_monitor() {
|
||||
// We failed to finish handling this SSTable, so we have to update the backlog_tracker
|
||||
// about it.
|
||||
if (_sst && _use_backlog_tracker) {
|
||||
if (_sst) {
|
||||
_table_s.get_backlog_tracker().revert_charges(_sst);
|
||||
}
|
||||
}
|
||||
@@ -378,16 +397,12 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
|
||||
};
|
||||
|
||||
virtual sstables::read_monitor& operator()(sstables::shared_sstable sst) override {
|
||||
auto p = _generated_monitors.emplace(sst->generation(), compaction_read_monitor(sst, _table_s, _use_backlog_tracker));
|
||||
auto p = _generated_monitors.emplace(sst->generation(), compaction_read_monitor(sst, _table_s));
|
||||
return p.first->second;
|
||||
}
|
||||
|
||||
explicit compaction_read_monitor_generator(table_state& table_s, use_backlog_tracker use_backlog_tracker = use_backlog_tracker::yes)
|
||||
: _table_s(table_s), _use_backlog_tracker(use_backlog_tracker) {}
|
||||
|
||||
uint64_t compacted() const {
|
||||
return boost::accumulate(_generated_monitors | boost::adaptors::map_values | boost::adaptors::transformed([](auto& monitor) { return monitor.compacted(); }), uint64_t(0));
|
||||
}
|
||||
explicit compaction_read_monitor_generator(table_state& table_s)
|
||||
: _table_s(table_s) {}
|
||||
|
||||
void remove_exhausted_sstables(const std::vector<sstables::shared_sstable>& exhausted_sstables) {
|
||||
for (auto& sst : exhausted_sstables) {
|
||||
@@ -400,29 +415,8 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
|
||||
private:
|
||||
table_state& _table_s;
|
||||
std::unordered_map<generation_type, compaction_read_monitor> _generated_monitors;
|
||||
use_backlog_tracker _use_backlog_tracker;
|
||||
|
||||
friend class compaction_progress_monitor;
|
||||
};
|
||||
|
||||
void compaction_progress_monitor::set_generator(std::unique_ptr<read_monitor_generator> generator) {
|
||||
_generator = std::move(generator);
|
||||
}
|
||||
|
||||
void compaction_progress_monitor::reset_generator() {
|
||||
if (_generator) {
|
||||
_progress = dynamic_cast<compaction_read_monitor_generator&>(*_generator).compacted();
|
||||
}
|
||||
_generator = nullptr;
|
||||
}
|
||||
|
||||
uint64_t compaction_progress_monitor::get_progress() const {
|
||||
if (_generator) {
|
||||
return dynamic_cast<compaction_read_monitor_generator&>(*_generator).compacted();
|
||||
}
|
||||
return _progress;
|
||||
}
|
||||
|
||||
class formatted_sstables_list {
|
||||
bool _include_origin = true;
|
||||
std::vector<std::string> _ssts;
|
||||
@@ -453,9 +447,9 @@ class compaction {
|
||||
protected:
|
||||
compaction_data& _cdata;
|
||||
table_state& _table_s;
|
||||
const compaction_sstable_creator_fn _sstable_creator;
|
||||
const schema_ptr _schema;
|
||||
const reader_permit _permit;
|
||||
compaction_sstable_creator_fn _sstable_creator;
|
||||
schema_ptr _schema;
|
||||
reader_permit _permit;
|
||||
std::vector<shared_sstable> _sstables;
|
||||
std::vector<generation_type> _input_sstable_generations;
|
||||
// Unused sstables are tracked because if compaction is interrupted we can only delete them.
|
||||
@@ -464,47 +458,41 @@ protected:
|
||||
std::vector<shared_sstable> _new_unused_sstables;
|
||||
std::vector<shared_sstable> _all_new_sstables;
|
||||
lw_shared_ptr<sstable_set> _compacting;
|
||||
const sstables::compaction_type _type;
|
||||
const uint64_t _max_sstable_size;
|
||||
const uint32_t _sstable_level;
|
||||
sstables::compaction_type _type;
|
||||
uint64_t _max_sstable_size;
|
||||
uint32_t _sstable_level;
|
||||
uint64_t _start_size = 0;
|
||||
uint64_t _end_size = 0;
|
||||
// fully expired files, which are skipped, aren't taken into account.
|
||||
uint64_t _compacting_data_file_size = 0;
|
||||
uint64_t _estimated_partitions = 0;
|
||||
uint64_t _bloom_filter_checks = 0;
|
||||
db::replay_position _rp;
|
||||
encoding_stats_collector _stats_collector;
|
||||
const bool _can_split_large_partition = false;
|
||||
bool _can_split_large_partition = false;
|
||||
bool _contains_multi_fragment_runs = false;
|
||||
mutation_source_metadata _ms_metadata = {};
|
||||
const compaction_sstable_replacer_fn _replacer;
|
||||
const run_id _run_identifier;
|
||||
compaction_sstable_replacer_fn _replacer;
|
||||
run_id _run_identifier;
|
||||
::io_priority_class _io_priority;
|
||||
// optional clone of sstable set to be used for expiration purposes, so it will be set if expiration is enabled.
|
||||
std::optional<sstable_set> _sstable_set;
|
||||
// used to incrementally calculate max purgeable timestamp, as we iterate through decorated keys.
|
||||
std::optional<sstable_set::incremental_selector> _selector;
|
||||
std::unordered_set<shared_sstable> _compacting_for_max_purgeable_func;
|
||||
// optional owned_ranges vector for cleanup;
|
||||
const owned_ranges_ptr _owned_ranges = {};
|
||||
// required for reshard compaction.
|
||||
const dht::sharder* _sharder = nullptr;
|
||||
const std::optional<dht::incremental_owned_ranges_checker> _owned_ranges_checker;
|
||||
owned_ranges_ptr _owned_ranges = {};
|
||||
std::optional<dht::incremental_owned_ranges_checker> _owned_ranges_checker;
|
||||
// Garbage collected sstables that are sealed but were not added to SSTable set yet.
|
||||
std::vector<shared_sstable> _unused_garbage_collected_sstables;
|
||||
// Garbage collected sstables that were added to SSTable set and should be eventually removed from it.
|
||||
std::vector<shared_sstable> _used_garbage_collected_sstables;
|
||||
utils::observable<> _stop_request_observable;
|
||||
private:
|
||||
// Keeps track of monitors for input sstable.
|
||||
// If _update_backlog_tracker is set to true, monitors are responsible for adjusting backlog as compaction progresses.
|
||||
compaction_progress_monitor& _progress_monitor;
|
||||
compaction_data& init_compaction_data(compaction_data& cdata, const compaction_descriptor& descriptor) const {
|
||||
cdata.compaction_fan_in = descriptor.fan_in();
|
||||
return cdata;
|
||||
}
|
||||
protected:
|
||||
compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor, use_backlog_tracker use_backlog_tracker)
|
||||
compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
|
||||
: _cdata(init_compaction_data(cdata, descriptor))
|
||||
, _table_s(table_s)
|
||||
, _sstable_creator(std::move(descriptor.creator))
|
||||
@@ -517,13 +505,12 @@ protected:
|
||||
, _can_split_large_partition(descriptor.can_split_large_partition)
|
||||
, _replacer(std::move(descriptor.replacer))
|
||||
, _run_identifier(descriptor.run_identifier)
|
||||
, _io_priority(descriptor.io_priority)
|
||||
, _sstable_set(std::move(descriptor.all_sstables_snapshot))
|
||||
, _selector(_sstable_set ? _sstable_set->make_incremental_selector() : std::optional<sstable_set::incremental_selector>{})
|
||||
, _compacting_for_max_purgeable_func(std::unordered_set<shared_sstable>(_sstables.begin(), _sstables.end()))
|
||||
, _owned_ranges(std::move(descriptor.owned_ranges))
|
||||
, _sharder(descriptor.sharder)
|
||||
, _owned_ranges_checker(_owned_ranges ? std::optional<dht::incremental_owned_ranges_checker>(*_owned_ranges) : std::nullopt)
|
||||
, _progress_monitor(progress_monitor)
|
||||
{
|
||||
for (auto& sst : _sstables) {
|
||||
_stats_collector.update(sst->get_encoding_stats_for_compaction());
|
||||
@@ -532,20 +519,12 @@ protected:
|
||||
_contains_multi_fragment_runs = std::any_of(_sstables.begin(), _sstables.end(), [&ssts_run_ids] (shared_sstable& sst) {
|
||||
return !ssts_run_ids.insert(sst->run_identifier()).second;
|
||||
});
|
||||
_progress_monitor.set_generator(std::make_unique<compaction_read_monitor_generator>(_table_s, use_backlog_tracker));
|
||||
}
|
||||
|
||||
read_monitor_generator& unwrap_monitor_generator() const {
|
||||
if (_progress_monitor._generator) {
|
||||
return *_progress_monitor._generator;
|
||||
}
|
||||
return default_read_monitor_generator();
|
||||
}
|
||||
|
||||
virtual uint64_t partitions_per_sstable() const {
|
||||
// some tests use _max_sstable_size == 0 for force many one partition per sstable
|
||||
auto max_sstable_size = std::max<uint64_t>(_max_sstable_size, 1);
|
||||
uint64_t estimated_sstables = std::max(1UL, uint64_t(ceil(double(_compacting_data_file_size) / max_sstable_size)));
|
||||
uint64_t estimated_sstables = std::max(1UL, uint64_t(ceil(double(_start_size) / max_sstable_size)));
|
||||
return std::min(uint64_t(ceil(double(_estimated_partitions) / estimated_sstables)),
|
||||
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimated_partitions));
|
||||
}
|
||||
@@ -602,14 +581,15 @@ protected:
|
||||
return bool(_sstable_set) && _table_s.tombstone_gc_enabled();
|
||||
}
|
||||
|
||||
compaction_writer create_gc_compaction_writer(run_id gc_run) const {
|
||||
compaction_writer create_gc_compaction_writer() const {
|
||||
auto sst = _sstable_creator(this_shard_id());
|
||||
|
||||
auto&& priority = _io_priority;
|
||||
auto monitor = std::make_unique<compaction_write_monitor>(sst, _table_s, maximum_timestamp(), _sstable_level);
|
||||
sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
|
||||
cfg.run_identifier = gc_run;
|
||||
cfg.run_identifier = _run_identifier;
|
||||
cfg.monitor = monitor.get();
|
||||
auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats());
|
||||
auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats(), priority);
|
||||
return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
|
||||
}
|
||||
|
||||
@@ -628,14 +608,8 @@ protected:
|
||||
// When compaction finishes, all the temporary sstables generated here will be deleted and removed
|
||||
// from table's sstable set.
|
||||
compacted_fragments_writer get_gc_compacted_fragments_writer() {
|
||||
// because the temporary sstable run can overlap with the non-gc sstables run created by
|
||||
// get_compacted_fragments_writer(), we have to use a different run_id. the gc_run_id is
|
||||
// created here as:
|
||||
// 1. it can be shared across all sstables created by this writer
|
||||
// 2. it is optional, as gc writer is not always used
|
||||
auto gc_run = run_id::create_random_id();
|
||||
return compacted_fragments_writer(*this,
|
||||
[this, gc_run] (const dht::decorated_key&) { return create_gc_compaction_writer(gc_run); },
|
||||
[this] (const dht::decorated_key&) { return create_gc_compaction_writer(); },
|
||||
[this] (compaction_writer* cw) { stop_gc_compaction_writer(cw); },
|
||||
_stop_request_observable);
|
||||
}
|
||||
@@ -652,8 +626,18 @@ protected:
|
||||
return _used_garbage_collected_sstables;
|
||||
}
|
||||
|
||||
virtual bool enable_garbage_collected_sstable_writer() const noexcept {
|
||||
return _contains_multi_fragment_runs && _max_sstable_size != std::numeric_limits<uint64_t>::max() && bool(_replacer);
|
||||
bool enable_garbage_collected_sstable_writer() const noexcept {
|
||||
return _contains_multi_fragment_runs && _max_sstable_size != std::numeric_limits<uint64_t>::max();
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2::filter make_partition_filter() const {
|
||||
return [this] (const dht::decorated_key& dk) {
|
||||
if (!_owned_ranges_checker->belongs_to_current_node(dk.token())) {
|
||||
log_trace("Token {} does not belong to this node, skipping", dk.token());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
}
|
||||
public:
|
||||
compaction& operator=(const compaction&) = delete;
|
||||
@@ -663,59 +647,20 @@ public:
|
||||
compaction& operator=(compaction&& other) = delete;
|
||||
|
||||
virtual ~compaction() {
|
||||
_progress_monitor.reset_generator();
|
||||
}
|
||||
private:
|
||||
// Default range sstable reader that will only return mutation that belongs to current shard.
|
||||
virtual flat_mutation_reader_v2 make_sstable_reader(schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding) const = 0;
|
||||
virtual flat_mutation_reader_v2 make_sstable_reader() const = 0;
|
||||
|
||||
// Make a filtering reader if needed
|
||||
// FIXME: the sstable reader itself should be pass the owned ranges
|
||||
// so it can skip over the disowned ranges efficiently using the index.
|
||||
// Ref https://github.com/scylladb/scylladb/issues/12998
|
||||
flat_mutation_reader_v2 setup_sstable_reader() const {
|
||||
if (!_owned_ranges_checker) {
|
||||
return make_sstable_reader(_schema,
|
||||
_permit,
|
||||
query::full_partition_range,
|
||||
_schema->full_slice(),
|
||||
tracing::trace_state_ptr(),
|
||||
::streamed_mutation::forwarding::no,
|
||||
::mutation_reader::forwarding::no);
|
||||
return make_sstable_reader();
|
||||
}
|
||||
|
||||
auto source = mutation_source([this] (schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
log_trace("Creating sstable set reader with range {}", range);
|
||||
return make_sstable_reader(std::move(s),
|
||||
std::move(permit),
|
||||
range,
|
||||
slice,
|
||||
std::move(trace_state),
|
||||
fwd,
|
||||
fwd_mr);
|
||||
});
|
||||
|
||||
auto owned_range_generator = [this] () -> std::optional<dht::partition_range> {
|
||||
auto r = _owned_ranges_checker->next_owned_range();
|
||||
if (r == nullptr) {
|
||||
return std::nullopt;
|
||||
}
|
||||
log_trace("Skipping to the next owned range {}", *r);
|
||||
return dht::to_partition_range(*r);
|
||||
};
|
||||
|
||||
return make_flat_multi_range_reader(_schema, _permit, std::move(source),
|
||||
std::move(owned_range_generator),
|
||||
_schema->full_slice(),
|
||||
tracing::trace_state_ptr());
|
||||
return make_filtering_reader(make_sstable_reader(), make_partition_filter());
|
||||
}
|
||||
|
||||
virtual sstables::sstable_set make_sstable_set_for_input() const {
|
||||
@@ -749,14 +694,12 @@ private:
|
||||
continue;
|
||||
}
|
||||
|
||||
_cdata.compaction_size += sst->data_size();
|
||||
// We also capture the sstable, so we keep it alive while the read isn't done
|
||||
ssts->insert(sst);
|
||||
// FIXME: If the sstables have cardinality estimation bitmaps, use that
|
||||
// for a better estimate for the number of partitions in the merged
|
||||
// sstable than just adding up the lengths of individual sstables.
|
||||
_estimated_partitions += sst->get_estimated_key_count();
|
||||
_compacting_data_file_size += sst->ondisk_data_size();
|
||||
// TODO:
|
||||
// Note that this is not fully correct. Since we might be merging sstables that originated on
|
||||
// another shard (#cpu changed), we might be comparing RP:s with differing shard ids,
|
||||
@@ -785,7 +728,7 @@ private:
|
||||
auto consumer = make_interposer_consumer([this] (flat_mutation_reader_v2 reader) mutable {
|
||||
return seastar::async([this, reader = std::move(reader)] () mutable {
|
||||
auto close_reader = deferred_close(reader);
|
||||
auto cfc = get_compacted_fragments_writer();
|
||||
auto cfc = compacted_fragments_writer(get_compacted_fragments_writer());
|
||||
reader.consume_in_thread(std::move(cfc));
|
||||
});
|
||||
});
|
||||
@@ -863,8 +806,8 @@ protected:
|
||||
// By the time being, using estimated key count.
|
||||
log_info("{} {} sstables to {}. {} to {} (~{}% of original) in {}ms = {}. ~{} total partitions merged to {}.",
|
||||
report_finish_desc(),
|
||||
_input_sstable_generations.size(), new_sstables_msg, utils::pretty_printed_data_size(_start_size), utils::pretty_printed_data_size(_end_size), int(ratio * 100),
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(), utils::pretty_printed_throughput(_start_size, duration),
|
||||
_input_sstable_generations.size(), new_sstables_msg, pretty_printed_data_size(_start_size), pretty_printed_data_size(_end_size), int(ratio * 100),
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(), pretty_printed_throughput(_end_size, duration),
|
||||
_cdata.total_partitions, _cdata.total_keys_written);
|
||||
|
||||
return ret;
|
||||
@@ -1001,7 +944,7 @@ void compacted_fragments_writer::split_large_partition() {
|
||||
_c.log_debug("Closing active tombstone {} with {} for partition {}", _current_partition.current_emitted_tombstone, rtc, *_current_partition.dk);
|
||||
_compaction_writer->writer.consume(std::move(rtc));
|
||||
}
|
||||
_c.log_debug("Splitting large partition {} in order to respect SSTable size limit of {}", *_current_partition.dk, utils::pretty_printed_data_size(_c._max_sstable_size));
|
||||
_c.log_debug("Splitting large partition {} in order to respect SSTable size limit of {}", *_current_partition.dk, pretty_printed_data_size(_c._max_sstable_size));
|
||||
// Close partition in current writer, and open it again in a new writer.
|
||||
do_consume_end_of_partition();
|
||||
stop_current_writer();
|
||||
@@ -1085,29 +1028,72 @@ void compacted_fragments_writer::consume_end_of_stream() {
|
||||
}
|
||||
}
|
||||
|
||||
class reshape_compaction : public compaction {
|
||||
public:
|
||||
reshape_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
|
||||
: compaction(table_s, std::move(descriptor), cdata) {
|
||||
}
|
||||
|
||||
virtual sstables::sstable_set make_sstable_set_for_input() const override {
|
||||
return sstables::make_partitioned_sstable_set(_schema, false);
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2 make_sstable_reader() const override {
|
||||
return _compacting->make_local_shard_sstable_reader(_schema,
|
||||
_permit,
|
||||
query::full_partition_range,
|
||||
_schema->full_slice(),
|
||||
_io_priority,
|
||||
tracing::trace_state_ptr(),
|
||||
::streamed_mutation::forwarding::no,
|
||||
::mutation_reader::forwarding::no,
|
||||
default_read_monitor_generator());
|
||||
}
|
||||
|
||||
std::string_view report_start_desc() const override {
|
||||
return "Reshaping";
|
||||
}
|
||||
|
||||
std::string_view report_finish_desc() const override {
|
||||
return "Reshaped";
|
||||
}
|
||||
|
||||
virtual compaction_writer create_compaction_writer(const dht::decorated_key& dk) override {
|
||||
auto sst = _sstable_creator(this_shard_id());
|
||||
setup_new_sstable(sst);
|
||||
|
||||
sstable_writer_config cfg = make_sstable_writer_config(compaction_type::Reshape);
|
||||
return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority), sst};
|
||||
}
|
||||
|
||||
virtual void stop_sstable_writer(compaction_writer* writer) override {
|
||||
if (writer) {
|
||||
finish_new_sstable(writer);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class regular_compaction : public compaction {
|
||||
// keeps track of monitors for input sstable, which are responsible for adjusting backlog as compaction progresses.
|
||||
mutable compaction_read_monitor_generator _monitor_generator;
|
||||
seastar::semaphore _replacer_lock = {1};
|
||||
public:
|
||||
regular_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor, use_backlog_tracker use_backlog_tracker = use_backlog_tracker::yes)
|
||||
: compaction(table_s, std::move(descriptor), cdata, progress_monitor, use_backlog_tracker)
|
||||
regular_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
|
||||
: compaction(table_s, std::move(descriptor), cdata)
|
||||
, _monitor_generator(_table_s)
|
||||
{
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2 make_sstable_reader(schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace,
|
||||
streamed_mutation::forwarding sm_fwd,
|
||||
mutation_reader::forwarding mr_fwd) const override {
|
||||
return _compacting->make_local_shard_sstable_reader(std::move(s),
|
||||
std::move(permit),
|
||||
range,
|
||||
slice,
|
||||
std::move(trace),
|
||||
sm_fwd,
|
||||
mr_fwd,
|
||||
unwrap_monitor_generator());
|
||||
flat_mutation_reader_v2 make_sstable_reader() const override {
|
||||
return _compacting->make_local_shard_sstable_reader(_schema,
|
||||
_permit,
|
||||
query::full_partition_range,
|
||||
_schema->full_slice(),
|
||||
_io_priority,
|
||||
tracing::trace_state_ptr(),
|
||||
::streamed_mutation::forwarding::no,
|
||||
::mutation_reader::forwarding::no,
|
||||
_monitor_generator);
|
||||
}
|
||||
|
||||
std::string_view report_start_desc() const override {
|
||||
@@ -1125,7 +1111,7 @@ public:
|
||||
auto monitor = std::make_unique<compaction_write_monitor>(sst, _table_s, maximum_timestamp(), _sstable_level);
|
||||
sstable_writer_config cfg = make_sstable_writer_config(_type);
|
||||
cfg.monitor = monitor.get();
|
||||
return compaction_writer{std::move(monitor), sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats()), sst};
|
||||
return compaction_writer{std::move(monitor), sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority), sst};
|
||||
}
|
||||
|
||||
virtual void stop_sstable_writer(compaction_writer* writer) override {
|
||||
@@ -1178,7 +1164,7 @@ private:
|
||||
log_debug("Replacing earlier exhausted sstable(s) {} by new sstable(s) {}", formatted_sstables_list(exhausted_ssts, false), formatted_sstables_list(_new_unused_sstables, true));
|
||||
_replacer(get_compaction_completion_desc(exhausted_ssts, std::move(_new_unused_sstables)));
|
||||
_sstables.erase(exhausted, _sstables.end());
|
||||
dynamic_cast<compaction_read_monitor_generator&>(unwrap_monitor_generator()).remove_exhausted_sstables(exhausted_ssts);
|
||||
_monitor_generator.remove_exhausted_sstables(exhausted_ssts);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1196,13 +1182,12 @@ private:
|
||||
}
|
||||
|
||||
void update_pending_ranges() {
|
||||
auto pending_replacements = std::exchange(_cdata.pending_replacements, {});
|
||||
if (!_sstable_set || _sstable_set->all()->empty() || pending_replacements.empty()) { // set can be empty for testing scenario.
|
||||
if (!_sstable_set || _sstable_set->all()->empty() || _cdata.pending_replacements.empty()) { // set can be empty for testing scenario.
|
||||
return;
|
||||
}
|
||||
// Releases reference to sstables compacted by this compaction or another, both of which belongs
|
||||
// to the same column family
|
||||
for (auto& pending_replacement : pending_replacements) {
|
||||
for (auto& pending_replacement : _cdata.pending_replacements) {
|
||||
for (auto& sst : pending_replacement.removed) {
|
||||
// Set may not contain sstable to be removed because this compaction may have started
|
||||
// before the creation of that sstable.
|
||||
@@ -1216,75 +1201,7 @@ private:
|
||||
}
|
||||
}
|
||||
_selector.emplace(_sstable_set->make_incremental_selector());
|
||||
}
|
||||
};
|
||||
|
||||
class reshape_compaction : public regular_compaction {
|
||||
private:
|
||||
bool has_sstable_replacer() const noexcept {
|
||||
return bool(_replacer);
|
||||
}
|
||||
public:
|
||||
reshape_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor)
|
||||
: regular_compaction(table_s, std::move(descriptor), cdata, progress_monitor, use_backlog_tracker::no) {
|
||||
}
|
||||
|
||||
virtual sstables::sstable_set make_sstable_set_for_input() const override {
|
||||
return sstables::make_partitioned_sstable_set(_schema, false);
|
||||
}
|
||||
|
||||
// Unconditionally enable incremental compaction if the strategy specifies a max output size, e.g. LCS.
|
||||
virtual bool enable_garbage_collected_sstable_writer() const noexcept override {
|
||||
return _max_sstable_size != std::numeric_limits<uint64_t>::max() && bool(_replacer);
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2 make_sstable_reader(schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace,
|
||||
streamed_mutation::forwarding sm_fwd,
|
||||
mutation_reader::forwarding mr_fwd) const override {
|
||||
return _compacting->make_local_shard_sstable_reader(std::move(s),
|
||||
std::move(permit),
|
||||
range,
|
||||
slice,
|
||||
std::move(trace),
|
||||
sm_fwd,
|
||||
mr_fwd,
|
||||
unwrap_monitor_generator());
|
||||
}
|
||||
|
||||
std::string_view report_start_desc() const override {
|
||||
return "Reshaping";
|
||||
}
|
||||
|
||||
std::string_view report_finish_desc() const override {
|
||||
return "Reshaped";
|
||||
}
|
||||
|
||||
virtual compaction_writer create_compaction_writer(const dht::decorated_key& dk) override {
|
||||
auto sst = _sstable_creator(this_shard_id());
|
||||
setup_new_sstable(sst);
|
||||
|
||||
sstable_writer_config cfg = make_sstable_writer_config(compaction_type::Reshape);
|
||||
return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats()), sst};
|
||||
}
|
||||
|
||||
virtual void stop_sstable_writer(compaction_writer* writer) override {
|
||||
if (writer) {
|
||||
if (has_sstable_replacer()) {
|
||||
regular_compaction::stop_sstable_writer(writer);
|
||||
} else {
|
||||
finish_new_sstable(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual void on_end_of_compaction() override {
|
||||
if (has_sstable_replacer()) {
|
||||
regular_compaction::on_end_of_compaction();
|
||||
}
|
||||
_cdata.pending_replacements.clear();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1312,8 +1229,8 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
cleanup_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor)
|
||||
: regular_compaction(table_s, std::move(descriptor), cdata, progress_monitor)
|
||||
cleanup_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
|
||||
: regular_compaction(table_s, std::move(descriptor), cdata)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -1542,8 +1459,8 @@ private:
|
||||
mutable uint64_t _validation_errors = 0;
|
||||
|
||||
public:
|
||||
scrub_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_type_options::scrub options, compaction_progress_monitor& progress_monitor)
|
||||
: regular_compaction(table_s, std::move(descriptor), cdata, progress_monitor, use_backlog_tracker::no)
|
||||
scrub_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_type_options::scrub options)
|
||||
: regular_compaction(table_s, std::move(descriptor), cdata)
|
||||
, _options(options)
|
||||
, _scrub_start_description(fmt::format("Scrubbing in {} mode", _options.operation_mode))
|
||||
, _scrub_finish_description(fmt::format("Finished scrubbing in {} mode", _options.operation_mode)) {
|
||||
@@ -1560,17 +1477,8 @@ public:
|
||||
return _scrub_finish_description;
|
||||
}
|
||||
|
||||
flat_mutation_reader_v2 make_sstable_reader(schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace,
|
||||
streamed_mutation::forwarding sm_fwd,
|
||||
mutation_reader::forwarding mr_fwd) const override {
|
||||
if (!range.is_full()) {
|
||||
on_internal_error(clogger, fmt::format("Scrub compaction in mode {} expected full partition range, but got {} instead", _options.operation_mode, range));
|
||||
}
|
||||
auto crawling_reader = _compacting->make_crawling_reader(std::move(s), std::move(permit), nullptr, unwrap_monitor_generator());
|
||||
flat_mutation_reader_v2 make_sstable_reader() const override {
|
||||
auto crawling_reader = _compacting->make_crawling_reader(_schema, _permit, _io_priority, nullptr);
|
||||
return make_flat_mutation_reader_v2<reader>(std::move(crawling_reader), _options.operation_mode, _validation_errors);
|
||||
}
|
||||
|
||||
@@ -1589,7 +1497,7 @@ public:
|
||||
return end_consumer;
|
||||
}
|
||||
return [this, end_consumer = std::move(end_consumer)] (flat_mutation_reader_v2 reader) mutable -> future<> {
|
||||
auto cfg = mutation_writer::segregate_config{memory::stats().total_memory() / 10};
|
||||
auto cfg = mutation_writer::segregate_config{_io_priority, memory::stats().total_memory() / 10};
|
||||
return mutation_writer::segregate_by_partition(std::move(reader), cfg,
|
||||
[consumer = std::move(end_consumer), this] (flat_mutation_reader_v2 rd) {
|
||||
++_bucket_count;
|
||||
@@ -1639,8 +1547,8 @@ private:
|
||||
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimation_per_shard[s].estimated_partitions));
|
||||
}
|
||||
public:
|
||||
resharding_compaction(table_state& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor)
|
||||
: compaction(table_s, std::move(descriptor), cdata, progress_monitor, use_backlog_tracker::no)
|
||||
resharding_compaction(table_state& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata)
|
||||
: compaction(table_s, std::move(descriptor), cdata)
|
||||
, _estimation_per_shard(smp::count)
|
||||
, _run_identifiers(smp::count)
|
||||
{
|
||||
@@ -1661,21 +1569,15 @@ public:
|
||||
~resharding_compaction() { }
|
||||
|
||||
// Use reader that makes sure no non-local mutation will not be filtered out.
|
||||
flat_mutation_reader_v2 make_sstable_reader(schema_ptr s,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
tracing::trace_state_ptr trace,
|
||||
streamed_mutation::forwarding sm_fwd,
|
||||
mutation_reader::forwarding mr_fwd) const override {
|
||||
return _compacting->make_range_sstable_reader(std::move(s),
|
||||
std::move(permit),
|
||||
range,
|
||||
slice,
|
||||
flat_mutation_reader_v2 make_sstable_reader() const override {
|
||||
return _compacting->make_range_sstable_reader(_schema,
|
||||
_permit,
|
||||
query::full_partition_range,
|
||||
_schema->full_slice(),
|
||||
_io_priority,
|
||||
nullptr,
|
||||
sm_fwd,
|
||||
mr_fwd,
|
||||
unwrap_monitor_generator());
|
||||
::streamed_mutation::forwarding::no,
|
||||
::mutation_reader::forwarding::no);
|
||||
|
||||
}
|
||||
|
||||
@@ -1698,14 +1600,14 @@ public:
|
||||
}
|
||||
|
||||
compaction_writer create_compaction_writer(const dht::decorated_key& dk) override {
|
||||
auto shard = _sharder->shard_of(dk.token());
|
||||
auto shard = dht::shard_of(*_schema, dk.token());
|
||||
auto sst = _sstable_creator(shard);
|
||||
setup_new_sstable(sst);
|
||||
|
||||
auto cfg = make_sstable_writer_config(compaction_type::Reshard);
|
||||
// sstables generated for a given shard will share the same run identifier.
|
||||
cfg.run_identifier = _run_identifiers.at(shard);
|
||||
return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(shard), cfg, get_encoding_stats(), shard), sst};
|
||||
return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(shard), cfg, get_encoding_stats(), _io_priority, shard), sst};
|
||||
}
|
||||
|
||||
void stop_sstable_writer(compaction_writer* writer) override {
|
||||
@@ -1747,49 +1649,47 @@ compaction_type compaction_type_options::type() const {
|
||||
return index_to_type[_options.index()];
|
||||
}
|
||||
|
||||
static std::unique_ptr<compaction> make_compaction(table_state& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor) {
|
||||
static std::unique_ptr<compaction> make_compaction(table_state& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata) {
|
||||
struct {
|
||||
table_state& table_s;
|
||||
sstables::compaction_descriptor&& descriptor;
|
||||
compaction_data& cdata;
|
||||
compaction_progress_monitor& progress_monitor;
|
||||
|
||||
std::unique_ptr<compaction> operator()(compaction_type_options::reshape) {
|
||||
return std::make_unique<reshape_compaction>(table_s, std::move(descriptor), cdata, progress_monitor);
|
||||
return std::make_unique<reshape_compaction>(table_s, std::move(descriptor), cdata);
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_type_options::reshard) {
|
||||
return std::make_unique<resharding_compaction>(table_s, std::move(descriptor), cdata, progress_monitor);
|
||||
return std::make_unique<resharding_compaction>(table_s, std::move(descriptor), cdata);
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_type_options::regular) {
|
||||
return std::make_unique<regular_compaction>(table_s, std::move(descriptor), cdata, progress_monitor);
|
||||
return std::make_unique<regular_compaction>(table_s, std::move(descriptor), cdata);
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_type_options::cleanup) {
|
||||
return std::make_unique<cleanup_compaction>(table_s, std::move(descriptor), cdata, progress_monitor);
|
||||
return std::make_unique<cleanup_compaction>(table_s, std::move(descriptor), cdata);
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_type_options::upgrade) {
|
||||
return std::make_unique<cleanup_compaction>(table_s, std::move(descriptor), cdata, progress_monitor);
|
||||
return std::make_unique<cleanup_compaction>(table_s, std::move(descriptor), cdata);
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_type_options::scrub scrub_options) {
|
||||
return std::make_unique<scrub_compaction>(table_s, std::move(descriptor), cdata, scrub_options, progress_monitor);
|
||||
return std::make_unique<scrub_compaction>(table_s, std::move(descriptor), cdata, scrub_options);
|
||||
}
|
||||
} visitor_factory{table_s, std::move(descriptor), cdata, progress_monitor};
|
||||
} visitor_factory{table_s, std::move(descriptor), cdata};
|
||||
|
||||
return descriptor.options.visit(visitor_factory);
|
||||
}
|
||||
|
||||
static future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s, read_monitor_generator& monitor_generator) {
|
||||
static future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s) {
|
||||
auto schema = table_s.schema();
|
||||
auto permit = table_s.make_compaction_reader_permit();
|
||||
|
||||
uint64_t validation_errors = 0;
|
||||
cdata.compaction_size = boost::accumulate(descriptor.sstables | boost::adaptors::transformed([] (auto& sst) { return sst->data_size(); }), int64_t(0));
|
||||
|
||||
for (const auto& sst : descriptor.sstables) {
|
||||
clogger.info("Scrubbing in validate mode {}", sst->get_filename());
|
||||
|
||||
validation_errors += co_await sst->validate(permit, cdata.abort, [&schema] (sstring what) {
|
||||
validation_errors += co_await sst->validate(permit, descriptor.io_priority, cdata.abort, [&schema] (sstring what) {
|
||||
scrub_compaction::report_validation_error(compaction_type::Scrub, *schema, what);
|
||||
}, monitor_generator(sst));
|
||||
});
|
||||
// Did validation actually finish because aborted?
|
||||
if (cdata.is_stop_requested()) {
|
||||
// Compaction manager will catch this exception and re-schedule the compaction.
|
||||
@@ -1799,10 +1699,9 @@ static future<compaction_result> scrub_sstables_validate_mode(sstables::compacti
|
||||
clogger.info("Finished scrubbing in validate mode {} - sstable is {}", sst->get_filename(), validation_errors == 0 ? "valid" : "invalid");
|
||||
}
|
||||
|
||||
using scrub = sstables::compaction_type_options::scrub;
|
||||
if (validation_errors != 0 && descriptor.options.as<scrub>().quarantine_sstables == scrub::quarantine_invalid_sstables::yes) {
|
||||
if (validation_errors != 0) {
|
||||
for (auto& sst : descriptor.sstables) {
|
||||
co_await sst->change_state(sstables::sstable_state::quarantine);
|
||||
co_await sst->change_state(sstables::quarantine_dir);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1815,15 +1714,8 @@ static future<compaction_result> scrub_sstables_validate_mode(sstables::compacti
|
||||
};
|
||||
}
|
||||
|
||||
future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s, compaction_progress_monitor& progress_monitor) {
|
||||
progress_monitor.set_generator(std::make_unique<compaction_read_monitor_generator>(table_s, use_backlog_tracker::no));
|
||||
auto d = defer([&] { progress_monitor.reset_generator(); });
|
||||
auto res = co_await scrub_sstables_validate_mode(descriptor, cdata, table_s, *progress_monitor._generator);
|
||||
co_return res;
|
||||
}
|
||||
|
||||
future<compaction_result>
|
||||
compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s, compaction_progress_monitor& progress_monitor) {
|
||||
compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s) {
|
||||
if (descriptor.sstables.empty()) {
|
||||
return make_exception_future<compaction_result>(std::runtime_error(format("Called {} compaction with empty set on behalf of {}.{}",
|
||||
compaction_name(descriptor.options.type()), table_s.schema()->ks_name(), table_s.schema()->cf_name())));
|
||||
@@ -1831,9 +1723,9 @@ compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cd
|
||||
if (descriptor.options.type() == compaction_type::Scrub
|
||||
&& std::get<compaction_type_options::scrub>(descriptor.options.options()).operation_mode == compaction_type_options::scrub::mode::validate) {
|
||||
// Bypass the usual compaction machinery for dry-mode scrub
|
||||
return scrub_sstables_validate_mode(std::move(descriptor), cdata, table_s, progress_monitor);
|
||||
return scrub_sstables_validate_mode(std::move(descriptor), cdata, table_s);
|
||||
}
|
||||
return compaction::run(make_compaction(table_s, std::move(descriptor), cdata, progress_monitor));
|
||||
return compaction::run(make_compaction(table_s, std::move(descriptor), cdata));
|
||||
}
|
||||
|
||||
std::unordered_set<sstables::shared_sstable>
|
||||
@@ -1851,7 +1743,7 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable
|
||||
int64_t min_timestamp = std::numeric_limits<int64_t>::max();
|
||||
|
||||
for (auto& sstable : overlapping) {
|
||||
auto gc_before = sstable->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
|
||||
auto gc_before = sstable->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state());
|
||||
if (sstable->get_max_local_deletion_time() >= gc_before) {
|
||||
min_timestamp = std::min(min_timestamp, sstable->get_stats_metadata().min_timestamp);
|
||||
}
|
||||
@@ -1870,7 +1762,7 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable
|
||||
|
||||
// SStables that do not contain live data is added to list of possibly expired sstables.
|
||||
for (auto& candidate : compacting) {
|
||||
auto gc_before = candidate->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
|
||||
auto gc_before = candidate->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state());
|
||||
clogger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}",
|
||||
candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before);
|
||||
// A fully expired sstable which has an ancestor undeleted shouldn't be compacted because
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
#include "compaction/compaction_descriptor.hh"
|
||||
#include "gc_clock.hh"
|
||||
#include "compaction_weight_registration.hh"
|
||||
#include "service/priority_manager.hh"
|
||||
#include "utils/UUID.hh"
|
||||
#include "utils/pretty_printers.hh"
|
||||
#include "table_state.hh"
|
||||
#include <seastar/core/thread.hh>
|
||||
#include <seastar/core/abort_source.hh>
|
||||
@@ -25,6 +25,21 @@ namespace sstables {
|
||||
|
||||
bool is_eligible_for_compaction(const sstables::shared_sstable& sst) noexcept;
|
||||
|
||||
class pretty_printed_data_size {
|
||||
uint64_t _size;
|
||||
public:
|
||||
pretty_printed_data_size(uint64_t size) : _size(size) {}
|
||||
friend std::ostream& operator<<(std::ostream&, pretty_printed_data_size);
|
||||
};
|
||||
|
||||
class pretty_printed_throughput {
|
||||
uint64_t _size;
|
||||
std::chrono::duration<float> _duration;
|
||||
public:
|
||||
pretty_printed_throughput(uint64_t size, std::chrono::duration<float> dur) : _size(size), _duration(std::move(dur)) {}
|
||||
friend std::ostream& operator<<(std::ostream&, pretty_printed_throughput);
|
||||
};
|
||||
|
||||
// Return the name of the compaction type
|
||||
// as used over the REST api, e.g. "COMPACTION" or "CLEANUP".
|
||||
sstring compaction_name(compaction_type type);
|
||||
@@ -48,7 +63,6 @@ struct compaction_info {
|
||||
};
|
||||
|
||||
struct compaction_data {
|
||||
uint64_t compaction_size = 0;
|
||||
uint64_t total_partitions = 0;
|
||||
uint64_t total_keys_written = 0;
|
||||
sstring stop_requested;
|
||||
@@ -101,27 +115,12 @@ struct compaction_result {
|
||||
compaction_stats stats;
|
||||
};
|
||||
|
||||
class read_monitor_generator;
|
||||
|
||||
class compaction_progress_monitor {
|
||||
std::unique_ptr<read_monitor_generator> _generator = nullptr;
|
||||
uint64_t _progress = 0;
|
||||
public:
|
||||
void set_generator(std::unique_ptr<read_monitor_generator> generator);
|
||||
void reset_generator();
|
||||
// Returns number of bytes processed with _generator.
|
||||
uint64_t get_progress() const;
|
||||
|
||||
friend class compaction;
|
||||
friend future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor, compaction_data&, table_state&, compaction_progress_monitor&);
|
||||
};
|
||||
|
||||
// Compact a list of N sstables into M sstables.
|
||||
// Returns info about the finished compaction, which includes vector to new sstables.
|
||||
//
|
||||
// compaction_descriptor is responsible for specifying the type of compaction, and influencing
|
||||
// compaction behavior through its available member fields.
|
||||
future<compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s, compaction_progress_monitor& progress_monitor);
|
||||
future<compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cdata, table_state& table_s);
|
||||
|
||||
// Return list of expired sstables for column family cf.
|
||||
// A sstable is fully expired *iff* its max_local_deletion_time precedes gc_before and its
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <memory>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include "sstables/shared_sstable.hh"
|
||||
#include "sstables/progress_monitor.hh"
|
||||
#include "timestamp.hh"
|
||||
|
||||
class compaction_backlog_manager;
|
||||
@@ -59,20 +60,18 @@ public:
|
||||
using ongoing_compactions = std::unordered_map<sstables::shared_sstable, backlog_read_progress_manager*>;
|
||||
|
||||
struct impl {
|
||||
// FIXME: Should provide strong exception safety guarantees
|
||||
virtual void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts) = 0;
|
||||
virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) = 0;
|
||||
virtual double backlog(const ongoing_writes& ow, const ongoing_compactions& oc) const = 0;
|
||||
virtual ~impl() { }
|
||||
};
|
||||
|
||||
compaction_backlog_tracker(std::unique_ptr<impl> impl) : _impl(std::move(impl)) {}
|
||||
compaction_backlog_tracker(compaction_backlog_tracker&&);
|
||||
compaction_backlog_tracker& operator=(compaction_backlog_tracker&&) = delete;
|
||||
compaction_backlog_tracker& operator=(compaction_backlog_tracker&&) noexcept;
|
||||
compaction_backlog_tracker(const compaction_backlog_tracker&) = delete;
|
||||
~compaction_backlog_tracker();
|
||||
|
||||
double backlog() const;
|
||||
// FIXME: Should provide strong exception safety guarantees
|
||||
void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts);
|
||||
void register_partially_written_sstable(sstables::shared_sstable sst, backlog_write_progress_manager& wp);
|
||||
void register_compacting_sstable(sstables::shared_sstable sst, backlog_read_progress_manager& rp);
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "sstables/sstable_set.hh"
|
||||
#include "utils/UUID.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "compaction_weight_registration.hh"
|
||||
#include "compaction_fwd.hh"
|
||||
|
||||
namespace sstables {
|
||||
@@ -72,12 +73,6 @@ public:
|
||||
only, // scrub only quarantined sstables
|
||||
};
|
||||
quarantine_mode quarantine_operation_mode = quarantine_mode::include;
|
||||
|
||||
using quarantine_invalid_sstables = bool_class<class quarantine_invalid_sstables_tag>;
|
||||
|
||||
// Should invalid sstables be moved into quarantine.
|
||||
// Only applies to validate-mode.
|
||||
quarantine_invalid_sstables quarantine_sstables = quarantine_invalid_sstables::yes;
|
||||
};
|
||||
struct reshard {
|
||||
};
|
||||
@@ -114,8 +109,8 @@ public:
|
||||
return compaction_type_options(upgrade{});
|
||||
}
|
||||
|
||||
static compaction_type_options make_scrub(scrub::mode mode, scrub::quarantine_invalid_sstables quarantine_sstables = scrub::quarantine_invalid_sstables::yes) {
|
||||
return compaction_type_options(scrub{.operation_mode = mode, .quarantine_sstables = quarantine_sstables});
|
||||
static compaction_type_options make_scrub(scrub::mode mode) {
|
||||
return compaction_type_options(scrub{mode});
|
||||
}
|
||||
|
||||
template <typename... Visitor>
|
||||
@@ -123,11 +118,6 @@ public:
|
||||
return std::visit(std::forward<Visitor>(visitor)..., _options);
|
||||
}
|
||||
|
||||
template <typename OptionType>
|
||||
const auto& as() const {
|
||||
return std::get<OptionType>(_options);
|
||||
}
|
||||
|
||||
const options_variant& options() const { return _options; }
|
||||
|
||||
compaction_type type() const;
|
||||
@@ -161,12 +151,12 @@ struct compaction_descriptor {
|
||||
compaction_type_options options = compaction_type_options::make_regular();
|
||||
// If engaged, compaction will cleanup the input sstables by skipping non-owned ranges.
|
||||
compaction::owned_ranges_ptr owned_ranges;
|
||||
// Required for reshard compaction.
|
||||
const dht::sharder* sharder;
|
||||
|
||||
compaction_sstable_creator_fn creator;
|
||||
compaction_sstable_replacer_fn replacer;
|
||||
|
||||
::io_priority_class io_priority = default_priority_class();
|
||||
|
||||
// Denotes if this compaction task is comprised solely of completely expired SSTables
|
||||
sstables::has_only_fully_expired has_only_fully_expired = has_only_fully_expired::no;
|
||||
|
||||
@@ -176,6 +166,7 @@ struct compaction_descriptor {
|
||||
static constexpr uint64_t default_max_sstable_bytes = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
explicit compaction_descriptor(std::vector<sstables::shared_sstable> sstables,
|
||||
::io_priority_class io_priority,
|
||||
int level = default_level,
|
||||
uint64_t max_sstable_bytes = default_max_sstable_bytes,
|
||||
run_id run_identifier = run_id::create_random_id(),
|
||||
@@ -187,15 +178,18 @@ struct compaction_descriptor {
|
||||
, run_identifier(run_identifier)
|
||||
, options(options)
|
||||
, owned_ranges(std::move(owned_ranges_))
|
||||
, io_priority(io_priority)
|
||||
{}
|
||||
|
||||
explicit compaction_descriptor(sstables::has_only_fully_expired has_only_fully_expired,
|
||||
std::vector<sstables::shared_sstable> sstables)
|
||||
std::vector<sstables::shared_sstable> sstables,
|
||||
::io_priority_class io_priority)
|
||||
: sstables(std::move(sstables))
|
||||
, level(default_level)
|
||||
, max_sstable_bytes(default_max_sstable_bytes)
|
||||
, run_identifier(run_id::create_random_id())
|
||||
, options(compaction_type_options::make_regular())
|
||||
, io_priority(io_priority)
|
||||
, has_only_fully_expired(has_only_fully_expired)
|
||||
{}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -31,8 +31,8 @@
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
#include "compaction.hh"
|
||||
#include "compaction_weight_registration.hh"
|
||||
#include "compaction_backlog_manager.hh"
|
||||
#include "compaction/compaction_descriptor.hh"
|
||||
#include "compaction/task_manager_module.hh"
|
||||
#include "compaction_state.hh"
|
||||
#include "strategy_control.hh"
|
||||
@@ -46,14 +46,14 @@ class system_keyspace;
|
||||
class compaction_history_entry;
|
||||
}
|
||||
|
||||
class compacting_sstable_registration;
|
||||
|
||||
class repair_history_map {
|
||||
public:
|
||||
boost::icl::interval_map<dht::token, gc_clock::time_point, boost::icl::partial_absorber, std::less, boost::icl::inplace_max> map;
|
||||
};
|
||||
|
||||
namespace compaction {
|
||||
using throw_if_stopping = bool_class<struct throw_if_stopping_tag>;
|
||||
|
||||
class compaction_task_executor;
|
||||
class sstables_task_executor;
|
||||
class major_compaction_task_executor;
|
||||
@@ -64,6 +64,8 @@ class rewrite_sstables_compaction_task_executor;
|
||||
class cleanup_sstables_compaction_task_executor;
|
||||
class validate_sstables_compaction_task_executor;
|
||||
}
|
||||
class compaction_manager_test_task_executor;
|
||||
|
||||
// Compaction manager provides facilities to submit and track compaction jobs on
|
||||
// behalf of existing tables.
|
||||
class compaction_manager {
|
||||
@@ -161,21 +163,7 @@ private:
|
||||
per_table_history_maps _repair_history_maps;
|
||||
tombstone_gc_state _tombstone_gc_state;
|
||||
private:
|
||||
// Requires task->_compaction_state.gate to be held and task to be registered in _tasks.
|
||||
future<compaction_stats_opt> perform_task(shared_ptr<compaction::compaction_task_executor> task, throw_if_stopping do_throw_if_stopping);
|
||||
|
||||
// Return nullopt if compaction cannot be started
|
||||
std::optional<gate::holder> start_compaction(table_state& t);
|
||||
|
||||
// parent_info set to std::nullopt means that task manager should not register this task executor.
|
||||
// To create a task manager task with no parent, parent_info argument should contain empty task_info.
|
||||
template<typename TaskExecutor, typename... Args>
|
||||
requires std::is_base_of_v<compaction_task_executor, TaskExecutor> &&
|
||||
std::is_base_of_v<compaction_task_impl, TaskExecutor> &&
|
||||
requires (compaction_manager& cm, throw_if_stopping do_throw_if_stopping, Args&&... args) {
|
||||
{TaskExecutor(cm, do_throw_if_stopping, std::forward<Args>(args)...)} -> std::same_as<TaskExecutor>;
|
||||
}
|
||||
future<compaction_manager::compaction_stats_opt> perform_compaction(throw_if_stopping do_throw_if_stopping, std::optional<tasks::task_info> parent_info, Args&&... args);
|
||||
future<compaction_stats_opt> perform_task(shared_ptr<compaction::compaction_task_executor>);
|
||||
|
||||
future<> stop_tasks(std::vector<shared_ptr<compaction::compaction_task_executor>> tasks, sstring reason);
|
||||
future<> update_throughput(uint32_t value_mbs);
|
||||
@@ -194,20 +182,17 @@ private:
|
||||
// Get candidates for compaction strategy, which are all sstables but the ones being compacted.
|
||||
std::vector<sstables::shared_sstable> get_candidates(compaction::table_state& t) const;
|
||||
|
||||
bool eligible_for_compaction(const sstables::shared_sstable& sstable) const;
|
||||
bool eligible_for_compaction(const sstables::frozen_sstable_run& sstable_run) const;
|
||||
|
||||
template <std::ranges::range Range>
|
||||
requires std::convertible_to<std::ranges::range_value_t<Range>, sstables::shared_sstable> || std::convertible_to<std::ranges::range_value_t<Range>, sstables::frozen_sstable_run>
|
||||
std::vector<std::ranges::range_value_t<Range>> get_candidates(table_state& t, const Range& sstables) const;
|
||||
requires std::convertible_to<std::ranges::range_value_t<Range>, sstables::shared_sstable>
|
||||
std::vector<sstables::shared_sstable> get_candidates(table_state& t, const Range& sstables) const;
|
||||
|
||||
template <std::ranges::range Range>
|
||||
requires std::same_as<std::ranges::range_value_t<Range>, sstables::shared_sstable>
|
||||
void register_compacting_sstables(const Range& range);
|
||||
template <typename Iterator, typename Sentinel>
|
||||
requires std::same_as<Sentinel, Iterator> || std::sentinel_for<Sentinel, Iterator>
|
||||
void register_compacting_sstables(Iterator first, Sentinel last);
|
||||
|
||||
template <std::ranges::range Range>
|
||||
requires std::same_as<std::ranges::range_value_t<Range>, sstables::shared_sstable>
|
||||
void deregister_compacting_sstables(const Range& range);
|
||||
template <typename Iterator, typename Sentinel>
|
||||
requires std::same_as<Sentinel, Iterator> || std::sentinel_for<Sentinel, Iterator>
|
||||
void deregister_compacting_sstables(Iterator first, Sentinel last);
|
||||
|
||||
// gets the table's compaction state
|
||||
// throws std::out_of_range exception if not found.
|
||||
@@ -226,7 +211,7 @@ private:
|
||||
// similar-sized compaction.
|
||||
void postpone_compaction_for_table(compaction::table_state* t);
|
||||
|
||||
future<compaction_stats_opt> perform_sstable_scrub_validate_mode(compaction::table_state& t, std::optional<tasks::task_info> info);
|
||||
future<compaction_stats_opt> perform_sstable_scrub_validate_mode(compaction::table_state& t);
|
||||
future<> update_static_shares(float shares);
|
||||
|
||||
using get_candidates_func = std::function<future<std::vector<sstables::shared_sstable>>()>;
|
||||
@@ -234,11 +219,10 @@ private:
|
||||
// Guarantees that a maintenance task, e.g. cleanup, will be performed on all files available at the time
|
||||
// by retrieving set of candidates only after all compactions for table T were stopped, if any.
|
||||
template<typename TaskType, typename... Args>
|
||||
requires std::derived_from<TaskType, compaction_task_executor> &&
|
||||
std::derived_from<TaskType, compaction_task_impl>
|
||||
future<compaction_manager::compaction_stats_opt> perform_task_on_all_files(std::optional<tasks::task_info> info, table_state& t, sstables::compaction_type_options options, owned_ranges_ptr owned_ranges_ptr, get_candidates_func get_func, Args... args);
|
||||
requires std::derived_from<TaskType, compaction::compaction_task_executor>
|
||||
future<compaction_stats_opt> perform_task_on_all_files(compaction::table_state& t, sstables::compaction_type_options options, owned_ranges_ptr, get_candidates_func, Args... args);
|
||||
|
||||
future<compaction_stats_opt> rewrite_sstables(compaction::table_state& t, sstables::compaction_type_options options, owned_ranges_ptr, get_candidates_func, std::optional<tasks::task_info> info, can_purge_tombstones can_purge = can_purge_tombstones::yes);
|
||||
future<compaction_stats_opt> rewrite_sstables(compaction::table_state& t, sstables::compaction_type_options options, owned_ranges_ptr, get_candidates_func, can_purge_tombstones can_purge = can_purge_tombstones::yes);
|
||||
|
||||
// Stop all fibers, without waiting. Safe to be called multiple times.
|
||||
void do_stop() noexcept;
|
||||
@@ -310,7 +294,7 @@ public:
|
||||
|
||||
// Submit a table to be off-strategy compacted.
|
||||
// Returns true iff off-strategy compaction was required and performed.
|
||||
future<bool> perform_offstrategy(compaction::table_state& t, std::optional<tasks::task_info> info);
|
||||
future<bool> perform_offstrategy(compaction::table_state& t);
|
||||
|
||||
// Submit a table to be cleaned up and wait for its termination.
|
||||
//
|
||||
@@ -319,23 +303,21 @@ public:
|
||||
// Cleanup is about discarding keys that are no longer relevant for a
|
||||
// given sstable, e.g. after node loses part of its token range because
|
||||
// of a newly added node.
|
||||
future<> perform_cleanup(owned_ranges_ptr sorted_owned_ranges, compaction::table_state& t, std::optional<tasks::task_info> info);
|
||||
future<> perform_cleanup(owned_ranges_ptr sorted_owned_ranges, compaction::table_state& t);
|
||||
private:
|
||||
future<> try_perform_cleanup(owned_ranges_ptr sorted_owned_ranges, compaction::table_state& t, std::optional<tasks::task_info> info);
|
||||
future<> try_perform_cleanup(owned_ranges_ptr sorted_owned_ranges, compaction::table_state& t);
|
||||
|
||||
// Add sst to or remove it from the respective compaction_state.sstables_requiring_cleanup set.
|
||||
bool update_sstable_cleanup_state(table_state& t, const sstables::shared_sstable& sst, const dht::token_range_vector& sorted_owned_ranges);
|
||||
|
||||
future<> on_compaction_completion(table_state& t, sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy);
|
||||
public:
|
||||
// Submit a table to be upgraded and wait for its termination.
|
||||
future<> perform_sstable_upgrade(owned_ranges_ptr sorted_owned_ranges, compaction::table_state& t, bool exclude_current_version, std::optional<tasks::task_info> info = std::nullopt);
|
||||
future<> perform_sstable_upgrade(owned_ranges_ptr sorted_owned_ranges, compaction::table_state& t, bool exclude_current_version);
|
||||
|
||||
// Submit a table to be scrubbed and wait for its termination.
|
||||
future<compaction_stats_opt> perform_sstable_scrub(compaction::table_state& t, sstables::compaction_type_options::scrub opts, std::optional<tasks::task_info> info = std::nullopt);
|
||||
future<compaction_stats_opt> perform_sstable_scrub(compaction::table_state& t, sstables::compaction_type_options::scrub opts);
|
||||
|
||||
// Submit a table for major compaction.
|
||||
future<> perform_major_compaction(compaction::table_state& t, std::optional<tasks::task_info> info = std::nullopt);
|
||||
future<> perform_major_compaction(compaction::table_state& t);
|
||||
|
||||
|
||||
// Run a custom job for a given table, defined by a function
|
||||
@@ -345,7 +327,7 @@ public:
|
||||
// parameter type is the compaction type the operation can most closely be
|
||||
// associated with, use compaction_type::Compaction, if none apply.
|
||||
// parameter job is a function that will carry the operation
|
||||
future<> run_custom_job(compaction::table_state& s, sstables::compaction_type type, const char *desc, noncopyable_function<future<>(sstables::compaction_data&, sstables::compaction_progress_monitor&)> job, std::optional<tasks::task_info> info, throw_if_stopping do_throw_if_stopping);
|
||||
future<> run_custom_job(compaction::table_state& s, sstables::compaction_type type, const char *desc, noncopyable_function<future<>(sstables::compaction_data&)> job);
|
||||
|
||||
class compaction_reenabler {
|
||||
compaction_manager& _cm;
|
||||
@@ -433,7 +415,6 @@ public:
|
||||
|
||||
// checks if the sstable is in the respective compaction_state.sstables_requiring_cleanup set.
|
||||
bool requires_cleanup(table_state& t, const sstables::shared_sstable& sst) const;
|
||||
const std::unordered_set<sstables::shared_sstable>& sstables_requiring_cleanup(table_state& t) const;
|
||||
|
||||
friend class compacting_sstable_registration;
|
||||
friend class compaction_weight_registration;
|
||||
@@ -448,11 +429,12 @@ public:
|
||||
friend class compaction::rewrite_sstables_compaction_task_executor;
|
||||
friend class compaction::cleanup_sstables_compaction_task_executor;
|
||||
friend class compaction::validate_sstables_compaction_task_executor;
|
||||
friend class compaction_manager_test_task_executor;
|
||||
};
|
||||
|
||||
namespace compaction {
|
||||
|
||||
class compaction_task_executor : public enable_shared_from_this<compaction_task_executor> {
|
||||
class compaction_task_executor {
|
||||
public:
|
||||
enum class state {
|
||||
none, // initial and final state
|
||||
@@ -460,55 +442,42 @@ public:
|
||||
// counted in compaction_manager::stats::pending_tasks
|
||||
active, // task initiated active compaction, may alternate with pending
|
||||
// counted in compaction_manager::stats::active_tasks
|
||||
done, // task completed successfully (may transition only to state::none, or
|
||||
// state::pending for regular compaction)
|
||||
done, // task completed successfully (may transition only to state::none)
|
||||
// counted in compaction_manager::stats::completed_tasks
|
||||
postponed, // task was postponed (may transition only to state::none)
|
||||
// represented by the postponed_compactions metric
|
||||
failed, // task failed (may transition only to state::none)
|
||||
// counted in compaction_manager::stats::errors
|
||||
};
|
||||
static std::string_view to_string(state);
|
||||
protected:
|
||||
compaction_manager& _cm;
|
||||
::compaction::table_state* _compacting_table = nullptr;
|
||||
compaction::compaction_state& _compaction_state;
|
||||
sstables::compaction_data _compaction_data;
|
||||
state _state = state::none;
|
||||
throw_if_stopping _do_throw_if_stopping;
|
||||
sstables::compaction_progress_monitor _progress_monitor;
|
||||
|
||||
private:
|
||||
shared_future<compaction_manager::compaction_stats_opt> _compaction_done = make_ready_future<compaction_manager::compaction_stats_opt>();
|
||||
exponential_backoff_retry _compaction_retry = exponential_backoff_retry(std::chrono::seconds(5), std::chrono::seconds(300));
|
||||
sstables::compaction_type _type;
|
||||
sstables::run_id _output_run_identifier;
|
||||
gate::holder _gate_holder;
|
||||
sstring _description;
|
||||
compaction_manager::compaction_stats_opt _stats = std::nullopt;
|
||||
|
||||
public:
|
||||
explicit compaction_task_executor(compaction_manager& mgr, throw_if_stopping do_throw_if_stopping, ::compaction::table_state* t, sstables::compaction_type type, sstring desc);
|
||||
explicit compaction_task_executor(compaction_manager& mgr, ::compaction::table_state* t, sstables::compaction_type type, sstring desc);
|
||||
|
||||
compaction_task_executor(compaction_task_executor&&) = delete;
|
||||
compaction_task_executor(const compaction_task_executor&) = delete;
|
||||
|
||||
virtual ~compaction_task_executor() = default;
|
||||
|
||||
// called when a compaction replaces the exhausted sstables with the new set
|
||||
struct on_replacement {
|
||||
virtual ~on_replacement() {}
|
||||
// called after the replacement completes
|
||||
// @param sstables the old sstable which are replaced in this replacement
|
||||
virtual void on_removal(const std::vector<sstables::shared_sstable>& sstables) = 0;
|
||||
// called before the replacement happens
|
||||
// @param sstables the new sstables to be added to the table's sstable set
|
||||
virtual void on_addition(const std::vector<sstables::shared_sstable>& sstables) = 0;
|
||||
};
|
||||
virtual ~compaction_task_executor();
|
||||
|
||||
protected:
|
||||
future<> perform();
|
||||
|
||||
virtual future<compaction_manager::compaction_stats_opt> do_run() = 0;
|
||||
|
||||
using throw_if_stopping = bool_class<struct throw_if_stopping_tag>;
|
||||
|
||||
state switch_state(state new_state);
|
||||
|
||||
future<semaphore_units<named_semaphore_exception_factory>> acquire_semaphore(named_semaphore& sem, size_t units = 1);
|
||||
@@ -525,27 +494,24 @@ protected:
|
||||
// otherwise, returns stop_iteration::no after sleep for exponential retry.
|
||||
future<stop_iteration> maybe_retry(std::exception_ptr err, bool throw_on_abort = false);
|
||||
|
||||
future<sstables::compaction_result> compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement&,
|
||||
// Compacts set of SSTables according to the descriptor.
|
||||
using release_exhausted_func_t = std::function<void(const std::vector<sstables::shared_sstable>& exhausted_sstables)>;
|
||||
future<sstables::compaction_result> compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted,
|
||||
compaction_manager::can_purge_tombstones can_purge = compaction_manager::can_purge_tombstones::yes);
|
||||
future<sstables::compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted,
|
||||
compaction_manager::can_purge_tombstones can_purge = compaction_manager::can_purge_tombstones::yes);
|
||||
future<sstables::compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement&,
|
||||
compaction_manager::can_purge_tombstones can_purge = compaction_manager::can_purge_tombstones::yes,
|
||||
sstables::offstrategy offstrategy = sstables::offstrategy::no);
|
||||
future<> update_history(::compaction::table_state& t, const sstables::compaction_result& res, const sstables::compaction_data& cdata);
|
||||
bool should_update_history(sstables::compaction_type ct) {
|
||||
return ct == sstables::compaction_type::Compaction;
|
||||
}
|
||||
public:
|
||||
compaction_manager::compaction_stats_opt get_stats() const noexcept {
|
||||
return _stats;
|
||||
}
|
||||
|
||||
future<compaction_manager::compaction_stats_opt> run_compaction() noexcept;
|
||||
future<compaction_manager::compaction_stats_opt> run() noexcept;
|
||||
|
||||
const ::compaction::table_state* compacting_table() const noexcept {
|
||||
return _compacting_table;
|
||||
}
|
||||
|
||||
sstables::compaction_type compaction_type() const noexcept {
|
||||
sstables::compaction_type type() const noexcept {
|
||||
return _type;
|
||||
}
|
||||
|
||||
@@ -571,46 +537,27 @@ public:
|
||||
const sstring& description() const noexcept {
|
||||
return _description;
|
||||
}
|
||||
private:
|
||||
// Before _compaction_done is set in compaction_task_executor::run_compaction(), compaction_done() returns ready future.
|
||||
|
||||
future<compaction_manager::compaction_stats_opt> compaction_done() noexcept {
|
||||
return _compaction_done.get_future();
|
||||
}
|
||||
public:
|
||||
|
||||
bool stopping() const noexcept {
|
||||
return _compaction_data.abort.abort_requested();
|
||||
}
|
||||
|
||||
void stop_compaction(sstring reason) noexcept;
|
||||
void stop(sstring reason) noexcept;
|
||||
|
||||
sstables::compaction_stopped_exception make_compaction_stopped_exception() const;
|
||||
|
||||
template<typename TaskExecutor, typename... Args>
|
||||
requires std::is_base_of_v<compaction_task_executor, TaskExecutor> &&
|
||||
std::is_base_of_v<compaction_task_impl, TaskExecutor> &&
|
||||
requires (compaction_manager& cm, throw_if_stopping do_throw_if_stopping, Args&&... args) {
|
||||
{TaskExecutor(cm, do_throw_if_stopping, std::forward<Args>(args)...)} -> std::same_as<TaskExecutor>;
|
||||
}
|
||||
friend future<compaction_manager::compaction_stats_opt> compaction_manager::perform_compaction(throw_if_stopping do_throw_if_stopping, std::optional<tasks::task_info> parent_info, Args&&... args);
|
||||
friend future<compaction_manager::compaction_stats_opt> compaction_manager::perform_task(shared_ptr<compaction_task_executor> task, throw_if_stopping do_throw_if_stopping);
|
||||
friend fmt::formatter<compaction_task_executor>;
|
||||
friend future<> compaction_manager::stop_tasks(std::vector<shared_ptr<compaction_task_executor>> tasks, sstring reason);
|
||||
std::string describe() const;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, compaction::compaction_task_executor::state s);
|
||||
std::ostream& operator<<(std::ostream& os, const compaction::compaction_task_executor& task);
|
||||
|
||||
}
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<compaction::compaction_task_executor::state> {
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
auto format(compaction::compaction_task_executor::state c, fmt::format_context& ctx) const -> decltype(ctx.out());
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<compaction::compaction_task_executor> {
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
auto format(const compaction::compaction_task_executor& ex, fmt::format_context& ctx) const -> decltype(ctx.out());
|
||||
};
|
||||
|
||||
bool needs_cleanup(const sstables::shared_sstable& sst, const dht::token_range_vector& owned_ranges);
|
||||
|
||||
// Return all sstables but those that are off-strategy like the ones in maintenance set and staging dir.
|
||||
|
||||
@@ -32,7 +32,7 @@ struct compaction_state {
|
||||
// Signaled whenever a compaction task completes.
|
||||
condition_variable compaction_done;
|
||||
|
||||
std::optional<compaction_backlog_tracker> backlog_tracker;
|
||||
compaction_backlog_tracker backlog_tracker;
|
||||
|
||||
std::unordered_set<sstables::shared_sstable> sstables_requiring_cleanup;
|
||||
compaction::owned_ranges_ptr owned_ranges_ptr;
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include "seastar/core/on_internal_error.hh"
|
||||
#include "sstables/shared_sstable.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "compaction.hh"
|
||||
#include "compaction_strategy.hh"
|
||||
@@ -26,6 +24,7 @@
|
||||
#include <boost/range/adaptors.hpp>
|
||||
#include <boost/algorithm/cxx11/any_of.hpp>
|
||||
#include "size_tiered_compaction_strategy.hh"
|
||||
#include "date_tiered_compaction_strategy.hh"
|
||||
#include "leveled_compaction_strategy.hh"
|
||||
#include "time_window_compaction_strategy.hh"
|
||||
#include "backlog_controller.hh"
|
||||
@@ -33,25 +32,26 @@
|
||||
#include "size_tiered_backlog_tracker.hh"
|
||||
#include "leveled_manifest.hh"
|
||||
|
||||
logging::logger date_tiered_manifest::logger = logging::logger("DateTieredCompactionStrategy");
|
||||
logging::logger leveled_manifest::logger("LeveledManifest");
|
||||
|
||||
namespace sstables {
|
||||
|
||||
compaction_descriptor compaction_strategy_impl::make_major_compaction_job(std::vector<sstables::shared_sstable> candidates, int level, uint64_t max_sstable_bytes) {
|
||||
// run major compaction in maintenance priority
|
||||
return compaction_descriptor(std::move(candidates), level, max_sstable_bytes);
|
||||
return compaction_descriptor(std::move(candidates), service::get_local_streaming_priority(), level, max_sstable_bytes);
|
||||
}
|
||||
|
||||
std::vector<compaction_descriptor> compaction_strategy_impl::get_cleanup_compaction_jobs(table_state& table_s, std::vector<shared_sstable> candidates) const {
|
||||
// The default implementation is suboptimal and causes the writeamp problem described issue in #10097.
|
||||
// The compaction strategy relying on it should strive to implement its own method, to make cleanup bucket aware.
|
||||
return boost::copy_range<std::vector<compaction_descriptor>>(candidates | boost::adaptors::transformed([] (const shared_sstable& sst) {
|
||||
return compaction_descriptor({ sst },
|
||||
return compaction_descriptor({ sst }, service::get_local_compaction_priority(),
|
||||
sst->get_sstable_level(), sstables::compaction_descriptor::default_max_sstable_bytes, sst->run_identifier());
|
||||
}));
|
||||
}
|
||||
|
||||
bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const table_state& t) {
|
||||
bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const tombstone_gc_state& gc_state) {
|
||||
if (_disable_tombstone_compaction) {
|
||||
return false;
|
||||
}
|
||||
@@ -62,7 +62,7 @@ bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& s
|
||||
if (db_clock::now()-_tombstone_compaction_interval < sst->data_file_write_time()) {
|
||||
return false;
|
||||
}
|
||||
auto gc_before = sst->get_gc_before_for_drop_estimation(compaction_time, t.get_tombstone_gc_state(), t.schema());
|
||||
auto gc_before = sst->get_gc_before_for_drop_estimation(compaction_time, gc_state);
|
||||
return sst->estimate_droppable_tombstone_ratio(gc_before) >= _tombstone_threshold;
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ reader_consumer_v2 compaction_strategy_impl::make_interposer_consumer(const muta
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
||||
compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const {
|
||||
return compaction_descriptor();
|
||||
}
|
||||
|
||||
@@ -87,96 +87,17 @@ std::optional<sstring> compaction_strategy_impl::get_value(const std::map<sstrin
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void compaction_strategy_impl::validate_min_max_threshold(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
auto min_threshold_key = "min_threshold", max_threshold_key = "max_threshold";
|
||||
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, min_threshold_key);
|
||||
auto min_threshold = cql3::statements::property_definitions::to_long(min_threshold_key, tmp_value, DEFAULT_MIN_COMPACTION_THRESHOLD);
|
||||
if (min_threshold < 2) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be bigger or equal to 2", min_threshold_key, min_threshold));
|
||||
}
|
||||
|
||||
tmp_value = compaction_strategy_impl::get_value(options, max_threshold_key);
|
||||
auto max_threshold = cql3::statements::property_definitions::to_long(max_threshold_key, tmp_value, DEFAULT_MAX_COMPACTION_THRESHOLD);
|
||||
if (max_threshold < 2) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be bigger or equal to 2", max_threshold_key, max_threshold));
|
||||
}
|
||||
|
||||
unchecked_options.erase(min_threshold_key);
|
||||
unchecked_options.erase(max_threshold_key);
|
||||
}
|
||||
|
||||
static double validate_tombstone_threshold(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, compaction_strategy_impl::TOMBSTONE_THRESHOLD_OPTION);
|
||||
auto tombstone_threshold = cql3::statements::property_definitions::to_double(compaction_strategy_impl::TOMBSTONE_THRESHOLD_OPTION, tmp_value, compaction_strategy_impl::DEFAULT_TOMBSTONE_THRESHOLD);
|
||||
if (tombstone_threshold < 0.0 || tombstone_threshold > 1.0) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be between 0.0 and 1.0", compaction_strategy_impl::TOMBSTONE_THRESHOLD_OPTION, tombstone_threshold));
|
||||
}
|
||||
return tombstone_threshold;
|
||||
}
|
||||
|
||||
static double validate_tombstone_threshold(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
auto tombstone_threshold = validate_tombstone_threshold(options);
|
||||
unchecked_options.erase(compaction_strategy_impl::TOMBSTONE_THRESHOLD_OPTION);
|
||||
return tombstone_threshold;
|
||||
}
|
||||
|
||||
static db_clock::duration validate_tombstone_compaction_interval(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, compaction_strategy_impl::TOMBSTONE_COMPACTION_INTERVAL_OPTION);
|
||||
auto interval = cql3::statements::property_definitions::to_long(compaction_strategy_impl::TOMBSTONE_COMPACTION_INTERVAL_OPTION, tmp_value, compaction_strategy_impl::DEFAULT_TOMBSTONE_COMPACTION_INTERVAL().count());
|
||||
auto tombstone_compaction_interval = db_clock::duration(std::chrono::seconds(interval));
|
||||
if (interval <= 0) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be positive", compaction_strategy_impl::TOMBSTONE_COMPACTION_INTERVAL_OPTION, tombstone_compaction_interval));
|
||||
}
|
||||
return tombstone_compaction_interval;
|
||||
}
|
||||
|
||||
static db_clock::duration validate_tombstone_compaction_interval(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
auto tombstone_compaction_interval = validate_tombstone_compaction_interval(options);
|
||||
unchecked_options.erase(compaction_strategy_impl::TOMBSTONE_COMPACTION_INTERVAL_OPTION);
|
||||
return tombstone_compaction_interval;
|
||||
}
|
||||
|
||||
void compaction_strategy_impl::validate_options_for_strategy_type(const std::map<sstring, sstring>& options, sstables::compaction_strategy_type type) {
|
||||
auto unchecked_options = options;
|
||||
compaction_strategy_impl::validate_options(options, unchecked_options);
|
||||
switch (type) {
|
||||
case compaction_strategy_type::size_tiered:
|
||||
size_tiered_compaction_strategy::validate_options(options, unchecked_options);
|
||||
break;
|
||||
case compaction_strategy_type::leveled:
|
||||
leveled_compaction_strategy::validate_options(options, unchecked_options);
|
||||
break;
|
||||
case compaction_strategy_type::time_window:
|
||||
time_window_compaction_strategy::validate_options(options, unchecked_options);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unchecked_options.erase("class");
|
||||
if (!unchecked_options.empty()) {
|
||||
throw exceptions::configuration_exception(fmt::format("Invalid compaction strategy options {} for chosen strategy type", unchecked_options));
|
||||
}
|
||||
}
|
||||
|
||||
// options is a map of compaction strategy options and their values.
|
||||
// unchecked_options is an analogical map from which already checked options are deleted.
|
||||
// This helps making sure that only allowed options are being set.
|
||||
void compaction_strategy_impl::validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
validate_tombstone_threshold(options, unchecked_options);
|
||||
validate_tombstone_compaction_interval(options, unchecked_options);
|
||||
|
||||
auto it = options.find("enabled");
|
||||
if (it != options.end() && it->second != "true" && it->second != "false") {
|
||||
throw exceptions::configuration_exception(fmt::format("enabled value ({}) must be \"true\" or \"false\"", it->second));
|
||||
}
|
||||
unchecked_options.erase("enabled");
|
||||
}
|
||||
|
||||
compaction_strategy_impl::compaction_strategy_impl(const std::map<sstring, sstring>& options) {
|
||||
_tombstone_threshold = validate_tombstone_threshold(options);
|
||||
_tombstone_compaction_interval = validate_tombstone_compaction_interval(options);
|
||||
using namespace cql3::statements;
|
||||
|
||||
auto tmp_value = get_value(options, TOMBSTONE_THRESHOLD_OPTION);
|
||||
_tombstone_threshold = property_definitions::to_double(TOMBSTONE_THRESHOLD_OPTION, tmp_value, DEFAULT_TOMBSTONE_THRESHOLD);
|
||||
|
||||
tmp_value = get_value(options, TOMBSTONE_COMPACTION_INTERVAL_OPTION);
|
||||
auto interval = property_definitions::to_long(TOMBSTONE_COMPACTION_INTERVAL_OPTION, tmp_value, DEFAULT_TOMBSTONE_COMPACTION_INTERVAL().count());
|
||||
_tombstone_compaction_interval = db_clock::duration(std::chrono::seconds(interval));
|
||||
|
||||
// FIXME: validate options.
|
||||
}
|
||||
|
||||
} // namespace sstables
|
||||
@@ -188,7 +109,7 @@ size_tiered_backlog_tracker::compacted_backlog(const compaction_backlog_tracker:
|
||||
// A SSTable being compacted may not contribute to backlog if compaction strategy decided
|
||||
// to perform a low-efficiency compaction when system is under little load, or when user
|
||||
// performs major even though strategy is completely satisfied
|
||||
if (!_contrib.sstables.contains(crp.first)) {
|
||||
if (!_sstables_contributing_backlog.contains(crp.first)) {
|
||||
continue;
|
||||
}
|
||||
auto compacted = crp.second->compacted();
|
||||
@@ -198,11 +119,11 @@ size_tiered_backlog_tracker::compacted_backlog(const compaction_backlog_tracker:
|
||||
return in;
|
||||
}
|
||||
|
||||
// Provides strong exception safety guarantees.
|
||||
size_tiered_backlog_tracker::sstables_backlog_contribution size_tiered_backlog_tracker::calculate_sstables_backlog_contribution(const std::vector<sstables::shared_sstable>& all, const sstables::size_tiered_compaction_strategy_options& stcs_options) {
|
||||
sstables_backlog_contribution contrib;
|
||||
if (all.empty()) {
|
||||
return contrib;
|
||||
void size_tiered_backlog_tracker::refresh_sstables_backlog_contribution() {
|
||||
_sstables_backlog_contribution = 0.0f;
|
||||
_sstables_contributing_backlog = {};
|
||||
if (_all.empty()) {
|
||||
return;
|
||||
}
|
||||
using namespace sstables;
|
||||
|
||||
@@ -212,27 +133,25 @@ size_tiered_backlog_tracker::sstables_backlog_contribution size_tiered_backlog_t
|
||||
// in efficient jobs acting more aggressive than they really have to.
|
||||
// TODO: potentially switch to compaction manager's fan-in threshold, so to account for the dynamic
|
||||
// fan-in threshold behavior.
|
||||
const auto& newest_sst = std::ranges::max(all, std::less<generation_type>(), std::mem_fn(&sstable::generation));
|
||||
const auto& newest_sst = std::ranges::max(_all, std::less<generation_type>(), std::mem_fn(&sstable::generation));
|
||||
auto threshold = newest_sst->get_schema()->min_compaction_threshold();
|
||||
|
||||
for (auto& bucket : size_tiered_compaction_strategy::get_buckets(all, stcs_options)) {
|
||||
for (auto& bucket : size_tiered_compaction_strategy::get_buckets(boost::copy_range<std::vector<shared_sstable>>(_all), _stcs_options)) {
|
||||
if (!size_tiered_compaction_strategy::is_bucket_interesting(bucket, threshold)) {
|
||||
continue;
|
||||
}
|
||||
contrib.value += boost::accumulate(bucket | boost::adaptors::transformed([] (const shared_sstable& sst) -> double {
|
||||
_sstables_backlog_contribution += boost::accumulate(bucket | boost::adaptors::transformed([this] (const shared_sstable& sst) -> double {
|
||||
return sst->data_size() * log4(sst->data_size());
|
||||
}), double(0.0f));
|
||||
// Controller is disabled if exception is caught during add / remove calls, so not making any effort to make this exception safe
|
||||
contrib.sstables.insert(bucket.begin(), bucket.end());
|
||||
_sstables_contributing_backlog.insert(bucket.begin(), bucket.end());
|
||||
}
|
||||
|
||||
return contrib;
|
||||
}
|
||||
|
||||
double size_tiered_backlog_tracker::backlog(const compaction_backlog_tracker::ongoing_writes& ow, const compaction_backlog_tracker::ongoing_compactions& oc) const {
|
||||
inflight_component compacted = compacted_backlog(oc);
|
||||
|
||||
auto total_backlog_bytes = boost::accumulate(_contrib.sstables | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::data_size)), uint64_t(0));
|
||||
auto total_backlog_bytes = boost::accumulate(_sstables_contributing_backlog | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::data_size)), uint64_t(0));
|
||||
|
||||
// Bail out if effective backlog is zero, which happens in a small window where ongoing compaction exhausted
|
||||
// input files but is still sealing output files or doing managerial stuff like updating history table
|
||||
@@ -249,41 +168,26 @@ double size_tiered_backlog_tracker::backlog(const compaction_backlog_tracker::on
|
||||
auto effective_backlog_bytes = total_backlog_bytes - compacted.total_bytes;
|
||||
|
||||
// Sum of (Si - Ci) * log (Si) for all SSTables contributing backlog
|
||||
auto sstables_contribution = _contrib.value - compacted.contribution;
|
||||
auto sstables_contribution = _sstables_backlog_contribution - compacted.contribution;
|
||||
// This is subtracting ((Si - Ci) * log (Si)) from ((Si - Ci) * log(T)), yielding the final backlog
|
||||
auto b = (effective_backlog_bytes * log4(_total_bytes)) - sstables_contribution;
|
||||
return b > 0 ? b : 0;
|
||||
}
|
||||
|
||||
// Provides strong exception safety guarantees.
|
||||
void size_tiered_backlog_tracker::replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts) {
|
||||
auto tmp_all = _all;
|
||||
auto tmp_total_bytes = _total_bytes;
|
||||
tmp_all.reserve(_all.size() + new_ssts.size());
|
||||
|
||||
void size_tiered_backlog_tracker::replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) {
|
||||
for (auto& sst : old_ssts) {
|
||||
if (sst->data_size() > 0) {
|
||||
auto erased = tmp_all.erase(sst);
|
||||
if (erased) {
|
||||
tmp_total_bytes -= sst->data_size();
|
||||
}
|
||||
_total_bytes -= sst->data_size();
|
||||
_all.erase(sst);
|
||||
}
|
||||
}
|
||||
for (auto& sst : new_ssts) {
|
||||
if (sst->data_size() > 0) {
|
||||
auto [_, inserted] = tmp_all.insert(sst);
|
||||
if (inserted) {
|
||||
tmp_total_bytes += sst->data_size();
|
||||
}
|
||||
_total_bytes += sst->data_size();
|
||||
_all.insert(std::move(sst));
|
||||
}
|
||||
}
|
||||
auto tmp_contrib = calculate_sstables_backlog_contribution(boost::copy_range<std::vector<shared_sstable>>(tmp_all), _stcs_options);
|
||||
|
||||
std::invoke([&] () noexcept {
|
||||
_all = std::move(tmp_all);
|
||||
_total_bytes = tmp_total_bytes;
|
||||
_contrib = std::move(tmp_contrib);
|
||||
});
|
||||
refresh_sstables_backlog_contribution();
|
||||
}
|
||||
|
||||
namespace sstables {
|
||||
@@ -361,25 +265,23 @@ public:
|
||||
return b;
|
||||
}
|
||||
|
||||
// Provides strong exception safety guarantees
|
||||
virtual void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts) override {
|
||||
virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) override {
|
||||
struct replacement {
|
||||
std::vector<sstables::shared_sstable> old_ssts;
|
||||
std::vector<sstables::shared_sstable> new_ssts;
|
||||
};
|
||||
std::unordered_map<api::timestamp_type, replacement> per_window_replacement;
|
||||
auto tmp_windows = _windows;
|
||||
|
||||
for (auto& sst : new_ssts) {
|
||||
auto bound = lower_bound_of(sst->get_stats_metadata().max_timestamp);
|
||||
if (!tmp_windows.contains(bound)) {
|
||||
tmp_windows.emplace(bound, size_tiered_backlog_tracker(_stcs_options));
|
||||
if (!_windows.contains(bound)) {
|
||||
_windows.emplace(bound, size_tiered_backlog_tracker(_stcs_options));
|
||||
}
|
||||
per_window_replacement[bound].new_ssts.push_back(std::move(sst));
|
||||
}
|
||||
for (auto& sst : old_ssts) {
|
||||
auto bound = lower_bound_of(sst->get_stats_metadata().max_timestamp);
|
||||
if (tmp_windows.contains(bound)) {
|
||||
if (_windows.contains(bound)) {
|
||||
per_window_replacement[bound].old_ssts.push_back(std::move(sst));
|
||||
}
|
||||
}
|
||||
@@ -387,20 +289,12 @@ public:
|
||||
for (auto& [bound, r] : per_window_replacement) {
|
||||
// All windows must exist here, as windows are created for new files and will
|
||||
// remain alive as long as there's a single file in them
|
||||
auto it = tmp_windows.find(bound);
|
||||
if (it == tmp_windows.end()) {
|
||||
on_internal_error(clogger, fmt::format("window for bound {} not found", bound));
|
||||
}
|
||||
auto& w = it->second;
|
||||
w.replace_sstables(r.old_ssts, r.new_ssts);
|
||||
auto& w = _windows.at(bound);
|
||||
w.replace_sstables(std::move(r.old_ssts), std::move(r.new_ssts));
|
||||
if (w.total_bytes() <= 0) {
|
||||
tmp_windows.erase(bound);
|
||||
_windows.erase(bound);
|
||||
}
|
||||
}
|
||||
|
||||
std::invoke([&] () noexcept {
|
||||
_windows = std::move(tmp_windows);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -500,31 +394,25 @@ public:
|
||||
return b;
|
||||
}
|
||||
|
||||
// Provides strong exception safety guarantees
|
||||
virtual void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts) override {
|
||||
auto tmp_size_per_level = _size_per_level;
|
||||
virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) override {
|
||||
std::vector<sstables::shared_sstable> l0_old_ssts, l0_new_ssts;
|
||||
for (auto& sst : new_ssts) {
|
||||
auto level = sst->get_sstable_level();
|
||||
tmp_size_per_level[level] += sst->data_size();
|
||||
_size_per_level[level] += sst->data_size();
|
||||
if (level == 0) {
|
||||
l0_new_ssts.push_back(std::move(sst));
|
||||
}
|
||||
}
|
||||
for (auto& sst : old_ssts) {
|
||||
auto level = sst->get_sstable_level();
|
||||
tmp_size_per_level[level] -= sst->data_size();
|
||||
_size_per_level[level] -= sst->data_size();
|
||||
if (level == 0) {
|
||||
l0_old_ssts.push_back(std::move(sst));
|
||||
}
|
||||
}
|
||||
if (l0_old_ssts.size() || l0_new_ssts.size()) {
|
||||
// stcs replace_sstables guarantees strong exception safety
|
||||
_l0_scts.replace_sstables(std::move(l0_old_ssts), std::move(l0_new_ssts));
|
||||
}
|
||||
std::invoke([&] () noexcept {
|
||||
_size_per_level = std::move(tmp_size_per_level);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -532,14 +420,14 @@ struct unimplemented_backlog_tracker final : public compaction_backlog_tracker::
|
||||
virtual double backlog(const compaction_backlog_tracker::ongoing_writes& ow, const compaction_backlog_tracker::ongoing_compactions& oc) const override {
|
||||
return compaction_controller::disable_backlog;
|
||||
}
|
||||
virtual void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts) override {}
|
||||
virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) override {}
|
||||
};
|
||||
|
||||
struct null_backlog_tracker final : public compaction_backlog_tracker::impl {
|
||||
virtual double backlog(const compaction_backlog_tracker::ongoing_writes& ow, const compaction_backlog_tracker::ongoing_compactions& oc) const override {
|
||||
return 0;
|
||||
}
|
||||
virtual void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts) override {}
|
||||
virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) override {}
|
||||
};
|
||||
|
||||
//
|
||||
@@ -548,7 +436,7 @@ struct null_backlog_tracker final : public compaction_backlog_tracker::impl {
|
||||
//
|
||||
class null_compaction_strategy : public compaction_strategy_impl {
|
||||
public:
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control) override {
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) override {
|
||||
return sstables::compaction_descriptor();
|
||||
}
|
||||
|
||||
@@ -572,20 +460,6 @@ leveled_compaction_strategy::leveled_compaction_strategy(const std::map<sstring,
|
||||
{
|
||||
}
|
||||
|
||||
// options is a map of compaction strategy options and their values.
|
||||
// unchecked_options is an analogical map from which already checked options are deleted.
|
||||
// This helps making sure that only allowed options are being set.
|
||||
void leveled_compaction_strategy::validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
size_tiered_compaction_strategy_options::validate(options, unchecked_options);
|
||||
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, SSTABLE_SIZE_OPTION);
|
||||
auto min_sstables_size = cql3::statements::property_definitions::to_long(SSTABLE_SIZE_OPTION, tmp_value, DEFAULT_MAX_SSTABLE_SIZE_IN_MB);
|
||||
if (min_sstables_size <= 0) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be positive", SSTABLE_SIZE_OPTION, min_sstables_size));
|
||||
}
|
||||
unchecked_options.erase(SSTABLE_SIZE_OPTION);
|
||||
}
|
||||
|
||||
std::unique_ptr<compaction_backlog_tracker::impl> leveled_compaction_strategy::make_backlog_tracker() const {
|
||||
return std::make_unique<leveled_compaction_backlog_tracker>(_max_sstable_size_in_mb, _stcs_options);
|
||||
}
|
||||
@@ -619,22 +493,201 @@ time_window_compaction_strategy::time_window_compaction_strategy(const std::map<
|
||||
_use_clustering_key_filter = true;
|
||||
}
|
||||
|
||||
// options is a map of compaction strategy options and their values.
|
||||
// unchecked_options is an analogical map from which already checked options are deleted.
|
||||
// This helps making sure that only allowed options are being set.
|
||||
void time_window_compaction_strategy::validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
time_window_compaction_strategy_options::validate(options, unchecked_options);
|
||||
size_tiered_compaction_strategy_options::validate(options, unchecked_options);
|
||||
}
|
||||
|
||||
std::unique_ptr<compaction_backlog_tracker::impl> time_window_compaction_strategy::make_backlog_tracker() const {
|
||||
return std::make_unique<time_window_backlog_tracker>(_options, _stcs_options);
|
||||
}
|
||||
|
||||
} // namespace sstables
|
||||
|
||||
std::vector<sstables::shared_sstable>
|
||||
date_tiered_manifest::get_next_sstables(table_state& table_s, std::vector<sstables::shared_sstable>& uncompacting, gc_clock::time_point compaction_time) {
|
||||
if (table_s.main_sstable_set().all()->empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Find fully expired SSTables. Those will be included no matter what.
|
||||
auto expired = table_s.fully_expired_sstables(uncompacting, compaction_time);
|
||||
|
||||
if (!expired.empty()) {
|
||||
auto is_expired = [&] (const sstables::shared_sstable& s) { return expired.contains(s); };
|
||||
uncompacting.erase(boost::remove_if(uncompacting, is_expired), uncompacting.end());
|
||||
}
|
||||
|
||||
auto compaction_candidates = get_next_non_expired_sstables(table_s, uncompacting, compaction_time);
|
||||
if (!expired.empty()) {
|
||||
compaction_candidates.insert(compaction_candidates.end(), expired.begin(), expired.end());
|
||||
}
|
||||
return compaction_candidates;
|
||||
}
|
||||
|
||||
int64_t date_tiered_manifest::get_estimated_tasks(table_state& table_s) const {
|
||||
int base = table_s.schema()->min_compaction_threshold();
|
||||
int64_t now = get_now(table_s.main_sstable_set().all());
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
int64_t n = 0;
|
||||
|
||||
auto all_sstables = table_s.main_sstable_set().all();
|
||||
sstables.reserve(all_sstables->size());
|
||||
for (auto& entry : *all_sstables) {
|
||||
sstables.push_back(entry);
|
||||
}
|
||||
auto candidates = filter_old_sstables(sstables, _options.max_sstable_age, now);
|
||||
auto buckets = get_buckets(create_sst_and_min_timestamp_pairs(candidates), _options.base_time, base, now);
|
||||
|
||||
for (auto& bucket : buckets) {
|
||||
if (bucket.size() >= size_t(table_s.schema()->min_compaction_threshold())) {
|
||||
n += std::ceil(double(bucket.size()) / table_s.schema()->max_compaction_threshold());
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable>
|
||||
date_tiered_manifest::get_next_non_expired_sstables(table_state& table_s, std::vector<sstables::shared_sstable>& non_expiring_sstables, gc_clock::time_point compaction_time) {
|
||||
int base = table_s.schema()->min_compaction_threshold();
|
||||
int64_t now = get_now(table_s.main_sstable_set().all());
|
||||
auto most_interesting = get_compaction_candidates(table_s, non_expiring_sstables, now, base);
|
||||
|
||||
return most_interesting;
|
||||
|
||||
// FIXME: implement functionality below that will look for a single sstable with worth dropping tombstone,
|
||||
// iff strategy didn't find anything to compact. So it's not essential.
|
||||
#if 0
|
||||
// if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
|
||||
// ratio is greater than threshold.
|
||||
|
||||
List<SSTableReader> sstablesWithTombstones = Lists.newArrayList();
|
||||
for (SSTableReader sstable : nonExpiringSSTables)
|
||||
{
|
||||
if (worthDroppingTombstones(sstable, gcBefore))
|
||||
sstablesWithTombstones.add(sstable);
|
||||
}
|
||||
if (sstablesWithTombstones.isEmpty())
|
||||
return Collections.emptyList();
|
||||
|
||||
return Collections.singletonList(Collections.min(sstablesWithTombstones, new SSTableReader.SizeComparator()));
|
||||
#endif
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable>
|
||||
date_tiered_manifest::get_compaction_candidates(table_state& table_s, std::vector<sstables::shared_sstable> candidate_sstables, int64_t now, int base) {
|
||||
int min_threshold = table_s.schema()->min_compaction_threshold();
|
||||
int max_threshold = table_s.schema()->max_compaction_threshold();
|
||||
auto candidates = filter_old_sstables(candidate_sstables, _options.max_sstable_age, now);
|
||||
|
||||
auto buckets = get_buckets(create_sst_and_min_timestamp_pairs(candidates), _options.base_time, base, now);
|
||||
|
||||
return newest_bucket(buckets, min_threshold, max_threshold, now, _options.base_time);
|
||||
}
|
||||
|
||||
int64_t date_tiered_manifest::get_now(lw_shared_ptr<const sstables::sstable_list> shared_set) {
|
||||
int64_t max_timestamp = 0;
|
||||
for (auto& sst : *shared_set) {
|
||||
int64_t candidate = sst->get_stats_metadata().max_timestamp;
|
||||
max_timestamp = candidate > max_timestamp ? candidate : max_timestamp;
|
||||
}
|
||||
return max_timestamp;
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable>
|
||||
date_tiered_manifest::filter_old_sstables(std::vector<sstables::shared_sstable> sstables, api::timestamp_type max_sstable_age, int64_t now) {
|
||||
if (max_sstable_age == 0) {
|
||||
return sstables;
|
||||
}
|
||||
int64_t cutoff = now - max_sstable_age;
|
||||
|
||||
std::erase_if(sstables, [cutoff] (auto& sst) {
|
||||
return sst->get_stats_metadata().max_timestamp < cutoff;
|
||||
});
|
||||
|
||||
return sstables;
|
||||
}
|
||||
|
||||
std::vector<std::pair<sstables::shared_sstable,int64_t>>
|
||||
date_tiered_manifest::create_sst_and_min_timestamp_pairs(const std::vector<sstables::shared_sstable>& sstables) {
|
||||
std::vector<std::pair<sstables::shared_sstable,int64_t>> sstable_min_timestamp_pairs;
|
||||
sstable_min_timestamp_pairs.reserve(sstables.size());
|
||||
for (auto& sst : sstables) {
|
||||
sstable_min_timestamp_pairs.emplace_back(sst, sst->get_stats_metadata().min_timestamp);
|
||||
}
|
||||
return sstable_min_timestamp_pairs;
|
||||
}
|
||||
|
||||
date_tiered_compaction_strategy_options::date_tiered_compaction_strategy_options(const std::map<sstring, sstring>& options) {
|
||||
using namespace cql3::statements;
|
||||
|
||||
auto tmp_value = sstables::compaction_strategy_impl::get_value(options, TIMESTAMP_RESOLUTION_KEY);
|
||||
auto target_unit = tmp_value ? tmp_value.value() : DEFAULT_TIMESTAMP_RESOLUTION;
|
||||
|
||||
tmp_value = sstables::compaction_strategy_impl::get_value(options, MAX_SSTABLE_AGE_KEY);
|
||||
auto fractional_days = property_definitions::to_double(MAX_SSTABLE_AGE_KEY, tmp_value, DEFAULT_MAX_SSTABLE_AGE_DAYS);
|
||||
int64_t max_sstable_age_in_hours = std::lround(fractional_days * 24);
|
||||
max_sstable_age = duration_conversor::convert(target_unit, std::chrono::hours(max_sstable_age_in_hours));
|
||||
|
||||
tmp_value = sstables::compaction_strategy_impl::get_value(options, BASE_TIME_KEY);
|
||||
auto base_time_seconds = property_definitions::to_long(BASE_TIME_KEY, tmp_value, DEFAULT_BASE_TIME_SECONDS);
|
||||
base_time = duration_conversor::convert(target_unit, std::chrono::seconds(base_time_seconds));
|
||||
}
|
||||
|
||||
date_tiered_compaction_strategy_options::date_tiered_compaction_strategy_options() {
|
||||
auto max_sstable_age_in_hours = int64_t(DEFAULT_MAX_SSTABLE_AGE_DAYS * 24);
|
||||
max_sstable_age = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::hours(max_sstable_age_in_hours)).count();
|
||||
base_time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::seconds(DEFAULT_BASE_TIME_SECONDS)).count();
|
||||
}
|
||||
|
||||
namespace sstables {
|
||||
|
||||
date_tiered_compaction_strategy::date_tiered_compaction_strategy(const std::map<sstring, sstring>& options)
|
||||
: compaction_strategy_impl(options)
|
||||
, _manifest(options)
|
||||
{
|
||||
clogger.warn("DateTieredCompactionStrategy is deprecated. Usually cases for which it is used are better handled by TimeWindowCompactionStrategy."
|
||||
" Please change your compaction strategy to TWCS as DTCS will be retired in the near future");
|
||||
|
||||
// tombstone compaction is disabled by default because:
|
||||
// - deletion shouldn't be used with DTCS; rather data is deleted through TTL.
|
||||
// - with time series workloads, it's usually better to wait for whole sstable to be expired rather than
|
||||
// compacting a single sstable when it's more than 20% (default value) expired.
|
||||
// For more details, see CASSANDRA-9234
|
||||
if (!options.contains(TOMBSTONE_COMPACTION_INTERVAL_OPTION) && !options.contains(TOMBSTONE_THRESHOLD_OPTION)) {
|
||||
_disable_tombstone_compaction = true;
|
||||
date_tiered_manifest::logger.debug("Disabling tombstone compactions for DTCS");
|
||||
} else {
|
||||
date_tiered_manifest::logger.debug("Enabling tombstone compactions for DTCS");
|
||||
}
|
||||
|
||||
_use_clustering_key_filter = true;
|
||||
}
|
||||
|
||||
compaction_descriptor date_tiered_compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) {
|
||||
auto compaction_time = gc_clock::now();
|
||||
auto sstables = _manifest.get_next_sstables(table_s, candidates, compaction_time);
|
||||
|
||||
if (!sstables.empty()) {
|
||||
date_tiered_manifest::logger.debug("datetiered: Compacting {} out of {} sstables", sstables.size(), candidates.size());
|
||||
return sstables::compaction_descriptor(std::move(sstables), service::get_local_compaction_priority());
|
||||
}
|
||||
|
||||
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
|
||||
auto e = boost::range::remove_if(candidates, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
|
||||
});
|
||||
candidates.erase(e, candidates.end());
|
||||
if (candidates.empty()) {
|
||||
return sstables::compaction_descriptor();
|
||||
}
|
||||
// find oldest sstable which is worth dropping tombstones because they are more unlikely to
|
||||
// shadow data from other sstables, and it also tends to be relatively big.
|
||||
auto it = std::min_element(candidates.begin(), candidates.end(), [] (auto& i, auto& j) {
|
||||
return i->get_stats_metadata().min_timestamp < j->get_stats_metadata().min_timestamp;
|
||||
});
|
||||
return sstables::compaction_descriptor({ *it }, service::get_local_compaction_priority());
|
||||
}
|
||||
|
||||
std::unique_ptr<compaction_backlog_tracker::impl> date_tiered_compaction_strategy::make_backlog_tracker() const {
|
||||
return std::make_unique<unimplemented_backlog_tracker>();
|
||||
}
|
||||
|
||||
size_tiered_compaction_strategy::size_tiered_compaction_strategy(const std::map<sstring, sstring>& options)
|
||||
: compaction_strategy_impl(options)
|
||||
, _options(options)
|
||||
@@ -644,13 +697,6 @@ size_tiered_compaction_strategy::size_tiered_compaction_strategy(const size_tier
|
||||
: _options(options)
|
||||
{}
|
||||
|
||||
// options is a map of compaction strategy options and their values.
|
||||
// unchecked_options is an analogical map from which already checked options are deleted.
|
||||
// This helps making sure that only allowed options are being set.
|
||||
void size_tiered_compaction_strategy::validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
size_tiered_compaction_strategy_options::validate(options, unchecked_options);
|
||||
}
|
||||
|
||||
std::unique_ptr<compaction_backlog_tracker::impl> size_tiered_compaction_strategy::make_backlog_tracker() const {
|
||||
return std::make_unique<size_tiered_backlog_tracker>(_options);
|
||||
}
|
||||
@@ -667,8 +713,8 @@ compaction_strategy_type compaction_strategy::type() const {
|
||||
return _compaction_strategy_impl->type();
|
||||
}
|
||||
|
||||
compaction_descriptor compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control) {
|
||||
return _compaction_strategy_impl->get_sstables_for_compaction(table_s, control);
|
||||
compaction_descriptor compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) {
|
||||
return _compaction_strategy_impl->get_sstables_for_compaction(table_s, control, std::move(candidates));
|
||||
}
|
||||
|
||||
compaction_descriptor compaction_strategy::get_major_compaction_job(table_state& table_s, std::vector<sstables::shared_sstable> candidates) {
|
||||
@@ -700,8 +746,8 @@ compaction_backlog_tracker compaction_strategy::make_backlog_tracker() const {
|
||||
}
|
||||
|
||||
sstables::compaction_descriptor
|
||||
compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
||||
return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, mode);
|
||||
compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const {
|
||||
return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, iop, mode);
|
||||
}
|
||||
|
||||
uint64_t compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) const {
|
||||
@@ -729,6 +775,9 @@ compaction_strategy make_compaction_strategy(compaction_strategy_type strategy,
|
||||
case compaction_strategy_type::leveled:
|
||||
impl = ::make_shared<leveled_compaction_strategy>(options);
|
||||
break;
|
||||
case compaction_strategy_type::date_tiered:
|
||||
impl = ::make_shared<date_tiered_compaction_strategy>(options);
|
||||
break;
|
||||
case compaction_strategy_type::time_window:
|
||||
impl = ::make_shared<time_window_compaction_strategy>(options);
|
||||
break;
|
||||
@@ -747,6 +796,7 @@ compaction_strategy_state compaction_strategy_state::make(const compaction_strat
|
||||
switch (cs.type()) {
|
||||
case compaction_strategy_type::null:
|
||||
case compaction_strategy_type::size_tiered:
|
||||
case compaction_strategy_type::date_tiered:
|
||||
return compaction_strategy_state(default_empty_state{});
|
||||
case compaction_strategy_type::leveled:
|
||||
return compaction_strategy_state(leveled_compaction_strategy_state{});
|
||||
|
||||
@@ -44,7 +44,7 @@ public:
|
||||
compaction_strategy& operator=(compaction_strategy&&);
|
||||
|
||||
// Return a list of sstables to be compacted after applying the strategy.
|
||||
compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control);
|
||||
compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<shared_sstable> candidates);
|
||||
|
||||
compaction_descriptor get_major_compaction_job(table_state& table_s, std::vector<shared_sstable> candidates);
|
||||
|
||||
@@ -71,6 +71,8 @@ public:
|
||||
return "SizeTieredCompactionStrategy";
|
||||
case compaction_strategy_type::leveled:
|
||||
return "LeveledCompactionStrategy";
|
||||
case compaction_strategy_type::date_tiered:
|
||||
return "DateTieredCompactionStrategy";
|
||||
case compaction_strategy_type::time_window:
|
||||
return "TimeWindowCompactionStrategy";
|
||||
default:
|
||||
@@ -87,6 +89,8 @@ public:
|
||||
return compaction_strategy_type::size_tiered;
|
||||
} else if (short_name == "LeveledCompactionStrategy") {
|
||||
return compaction_strategy_type::leveled;
|
||||
} else if (short_name == "DateTieredCompactionStrategy") {
|
||||
return compaction_strategy_type::date_tiered;
|
||||
} else if (short_name == "TimeWindowCompactionStrategy") {
|
||||
return compaction_strategy_type::time_window;
|
||||
} else {
|
||||
@@ -122,7 +126,7 @@ public:
|
||||
//
|
||||
// The caller should also pass a maximum number of SSTables which is the maximum amount of
|
||||
// SSTables that can be added into a single job.
|
||||
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const;
|
||||
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const;
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -21,23 +21,20 @@ class sstable_set_impl;
|
||||
class resharding_descriptor;
|
||||
|
||||
class compaction_strategy_impl {
|
||||
public:
|
||||
static constexpr float DEFAULT_TOMBSTONE_THRESHOLD = 0.2f;
|
||||
// minimum interval needed to perform tombstone removal compaction in seconds, default 86400 or 1 day.
|
||||
static constexpr std::chrono::seconds DEFAULT_TOMBSTONE_COMPACTION_INTERVAL() { return std::chrono::seconds(86400); }
|
||||
static constexpr auto TOMBSTONE_THRESHOLD_OPTION = "tombstone_threshold";
|
||||
static constexpr auto TOMBSTONE_COMPACTION_INTERVAL_OPTION = "tombstone_compaction_interval";
|
||||
protected:
|
||||
const sstring TOMBSTONE_THRESHOLD_OPTION = "tombstone_threshold";
|
||||
const sstring TOMBSTONE_COMPACTION_INTERVAL_OPTION = "tombstone_compaction_interval";
|
||||
|
||||
bool _use_clustering_key_filter = false;
|
||||
bool _disable_tombstone_compaction = false;
|
||||
float _tombstone_threshold = DEFAULT_TOMBSTONE_THRESHOLD;
|
||||
db_clock::duration _tombstone_compaction_interval = DEFAULT_TOMBSTONE_COMPACTION_INTERVAL();
|
||||
public:
|
||||
static std::optional<sstring> get_value(const std::map<sstring, sstring>& options, const sstring& name);
|
||||
static void validate_min_max_threshold(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
|
||||
static void validate_options_for_strategy_type(const std::map<sstring, sstring>& options, sstables::compaction_strategy_type type);
|
||||
protected:
|
||||
static void validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
|
||||
compaction_strategy_impl() = default;
|
||||
explicit compaction_strategy_impl(const std::map<sstring, sstring>& options);
|
||||
static compaction_descriptor make_major_compaction_job(std::vector<sstables::shared_sstable> candidates,
|
||||
@@ -45,7 +42,7 @@ protected:
|
||||
uint64_t max_sstable_bytes = compaction_descriptor::default_max_sstable_bytes);
|
||||
public:
|
||||
virtual ~compaction_strategy_impl() {}
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control) = 0;
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) = 0;
|
||||
virtual compaction_descriptor get_major_compaction_job(table_state& table_s, std::vector<sstables::shared_sstable> candidates) {
|
||||
return make_major_compaction_job(std::move(candidates));
|
||||
}
|
||||
@@ -64,7 +61,7 @@ public:
|
||||
|
||||
// Check if a given sstable is entitled for tombstone compaction based on its
|
||||
// droppable tombstone histogram and gc_before.
|
||||
bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const table_state& t);
|
||||
bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const tombstone_gc_state& gc_state);
|
||||
|
||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const = 0;
|
||||
|
||||
@@ -76,6 +73,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const;
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ enum class compaction_strategy_type {
|
||||
null,
|
||||
size_tiered,
|
||||
leveled,
|
||||
date_tiered,
|
||||
time_window,
|
||||
};
|
||||
|
||||
|
||||
277
compaction/date_tiered_compaction_strategy.hh
Normal file
277
compaction/date_tiered_compaction_strategy.hh
Normal file
@@ -0,0 +1,277 @@
|
||||
/*
|
||||
* Copyright (C) 2016-present-2017 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <iterator>
|
||||
#include "sstables/sstables.hh"
|
||||
#include "compaction.hh"
|
||||
#include "timestamp.hh"
|
||||
#include "cql3/statements/property_definitions.hh"
|
||||
#include "compaction_strategy_impl.hh"
|
||||
|
||||
static constexpr double DEFAULT_MAX_SSTABLE_AGE_DAYS = 365;
|
||||
static constexpr int64_t DEFAULT_BASE_TIME_SECONDS = 60;
|
||||
|
||||
struct duration_conversor {
|
||||
// Convert given duration to TargetDuration and return value as timestamp.
|
||||
template <typename TargetDuration, typename SourceDuration>
|
||||
static api::timestamp_type convert(SourceDuration d) {
|
||||
return std::chrono::duration_cast<TargetDuration>(d).count();
|
||||
}
|
||||
|
||||
// Convert given duration to duration that is represented by the string
|
||||
// target_duration, and return value as timestamp.
|
||||
template <typename SourceDuration>
|
||||
static api::timestamp_type convert(const sstring& target_duration, SourceDuration d) {
|
||||
if (target_duration == "HOURS") {
|
||||
return convert<std::chrono::hours>(d);
|
||||
} else if (target_duration == "MICROSECONDS") {
|
||||
return convert<std::chrono::microseconds>(d);
|
||||
} else if (target_duration == "MILLISECONDS") {
|
||||
return convert<std::chrono::milliseconds>(d);
|
||||
} else if (target_duration == "MINUTES") {
|
||||
return convert<std::chrono::minutes>(d);
|
||||
} else if (target_duration == "NANOSECONDS") {
|
||||
return convert<std::chrono::nanoseconds>(d);
|
||||
} else if (target_duration == "SECONDS") {
|
||||
return convert<std::chrono::seconds>(d);
|
||||
} else {
|
||||
throw std::runtime_error(format("target duration {} is not available", target_duration));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class date_tiered_compaction_strategy_options {
|
||||
const sstring DEFAULT_TIMESTAMP_RESOLUTION = "MICROSECONDS";
|
||||
const sstring TIMESTAMP_RESOLUTION_KEY = "timestamp_resolution";
|
||||
const sstring MAX_SSTABLE_AGE_KEY = "max_sstable_age_days";
|
||||
const sstring BASE_TIME_KEY = "base_time_seconds";
|
||||
|
||||
api::timestamp_type max_sstable_age;
|
||||
api::timestamp_type base_time;
|
||||
public:
|
||||
date_tiered_compaction_strategy_options(const std::map<sstring, sstring>& options);
|
||||
|
||||
date_tiered_compaction_strategy_options();
|
||||
private:
|
||||
|
||||
friend class date_tiered_manifest;
|
||||
};
|
||||
|
||||
class date_tiered_manifest {
|
||||
date_tiered_compaction_strategy_options _options;
|
||||
public:
|
||||
static logging::logger logger;
|
||||
|
||||
date_tiered_manifest() = delete;
|
||||
|
||||
date_tiered_manifest(const std::map<sstring, sstring>& options)
|
||||
: _options(options) {}
|
||||
|
||||
std::vector<sstables::shared_sstable>
|
||||
get_next_sstables(table_state& table_s, std::vector<sstables::shared_sstable>& uncompacting, gc_clock::time_point compaction_time);
|
||||
|
||||
int64_t get_estimated_tasks(table_state& table_s) const;
|
||||
private:
|
||||
std::vector<sstables::shared_sstable>
|
||||
get_next_non_expired_sstables(table_state& table_s, std::vector<sstables::shared_sstable>& non_expiring_sstables, gc_clock::time_point compaction_time);
|
||||
|
||||
std::vector<sstables::shared_sstable>
|
||||
get_compaction_candidates(table_state& table_s, std::vector<sstables::shared_sstable> candidate_sstables, int64_t now, int base);
|
||||
|
||||
/**
|
||||
* Gets the timestamp that DateTieredCompactionStrategy considers to be the "current time".
|
||||
* @return the maximum timestamp across all SSTables.
|
||||
*/
|
||||
static int64_t get_now(lw_shared_ptr<const sstables::sstable_list> shared_set);
|
||||
|
||||
/**
|
||||
* Removes all sstables with max timestamp older than maxSSTableAge.
|
||||
* @return a list of sstables with the oldest sstables excluded
|
||||
*/
|
||||
static std::vector<sstables::shared_sstable>
|
||||
filter_old_sstables(std::vector<sstables::shared_sstable> sstables, api::timestamp_type max_sstable_age, int64_t now);
|
||||
|
||||
/**
|
||||
*
|
||||
* @param sstables
|
||||
* @return
|
||||
*/
|
||||
static std::vector<std::pair<sstables::shared_sstable,int64_t>>
|
||||
create_sst_and_min_timestamp_pairs(const std::vector<sstables::shared_sstable>& sstables);
|
||||
|
||||
/**
|
||||
* A target time span used for bucketing SSTables based on timestamps.
|
||||
*/
|
||||
struct target {
|
||||
// How big a range of timestamps fit inside the target.
|
||||
int64_t size;
|
||||
// A timestamp t hits the target iff t / size == divPosition.
|
||||
int64_t div_position;
|
||||
|
||||
target() = delete;
|
||||
target(int64_t size, int64_t div_position) : size(size), div_position(div_position) {}
|
||||
|
||||
/**
|
||||
* Compares the target to a timestamp.
|
||||
* @param timestamp the timestamp to compare.
|
||||
* @return a negative integer, zero, or a positive integer as the target lies before, covering, or after than the timestamp.
|
||||
*/
|
||||
int compare_to_timestamp(int64_t timestamp) {
|
||||
auto ts1 = div_position;
|
||||
auto ts2 = timestamp / size;
|
||||
return (ts1 > ts2 ? 1 : (ts1 == ts2 ? 0 : -1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells if the timestamp hits the target.
|
||||
* @param timestamp the timestamp to test.
|
||||
* @return <code>true</code> iff timestamp / size == divPosition.
|
||||
*/
|
||||
bool on_target(int64_t timestamp) {
|
||||
return compare_to_timestamp(timestamp) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next target, which represents an earlier time span.
|
||||
* @param base The number of contiguous targets that will have the same size. Targets following those will be <code>base</code> times as big.
|
||||
* @return
|
||||
*/
|
||||
target next_target(int base)
|
||||
{
|
||||
if (div_position % base > 0) {
|
||||
return target(size, div_position - 1);
|
||||
} else {
|
||||
return target(size * base, div_position / base - 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Group files with similar min timestamp into buckets. Files with recent min timestamps are grouped together into
|
||||
* buckets designated to short timespans while files with older timestamps are grouped into buckets representing
|
||||
* longer timespans.
|
||||
* @param files pairs consisting of a file and its min timestamp
|
||||
* @param timeUnit
|
||||
* @param base
|
||||
* @param now
|
||||
* @return a list of buckets of files. The list is ordered such that the files with newest timestamps come first.
|
||||
* Each bucket is also a list of files ordered from newest to oldest.
|
||||
*/
|
||||
std::vector<std::vector<sstables::shared_sstable>>
|
||||
get_buckets(std::vector<std::pair<sstables::shared_sstable,int64_t>>&& files, api::timestamp_type time_unit, int base, int64_t now) const {
|
||||
// Sort files by age. Newest first.
|
||||
std::sort(files.begin(), files.end(), [] (auto& i, auto& j) {
|
||||
return i.second > j.second;
|
||||
});
|
||||
|
||||
std::vector<std::vector<sstables::shared_sstable>> buckets;
|
||||
auto target = get_initial_target(now, time_unit);
|
||||
auto it = files.begin();
|
||||
|
||||
while (it != files.end()) {
|
||||
bool finish = false;
|
||||
while (!target.on_target(it->second)) {
|
||||
// If the file is too new for the target, skip it.
|
||||
if (target.compare_to_timestamp(it->second) < 0) {
|
||||
it++;
|
||||
if (it == files.end()) {
|
||||
finish = true;
|
||||
break;
|
||||
}
|
||||
} else { // If the file is too old for the target, switch targets.
|
||||
target = target.next_target(base);
|
||||
}
|
||||
}
|
||||
if (finish) {
|
||||
break;
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> bucket;
|
||||
while (target.on_target(it->second)) {
|
||||
bucket.push_back(it->first);
|
||||
it++;
|
||||
if (it == files.end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
buckets.push_back(bucket);
|
||||
}
|
||||
|
||||
return buckets;
|
||||
}
|
||||
|
||||
target get_initial_target(uint64_t now, int64_t time_unit) const {
|
||||
return target(time_unit, now / time_unit);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param buckets list of buckets, sorted from newest to oldest, from which to return the newest bucket within thresholds.
|
||||
* @param minThreshold minimum number of sstables in a bucket to qualify.
|
||||
* @param maxThreshold maximum number of sstables to compact at once (the returned bucket will be trimmed down to this).
|
||||
* @return a bucket (list) of sstables to compact.
|
||||
*/
|
||||
std::vector<sstables::shared_sstable>
|
||||
newest_bucket(std::vector<std::vector<sstables::shared_sstable>>& buckets, int min_threshold, int max_threshold,
|
||||
int64_t now, api::timestamp_type base_time) {
|
||||
|
||||
// If the "incoming window" has at least minThreshold SSTables, choose that one.
|
||||
// For any other bucket, at least 2 SSTables is enough.
|
||||
// In any case, limit to maxThreshold SSTables.
|
||||
target incoming_window = get_initial_target(now, base_time);
|
||||
for (auto& bucket : buckets) {
|
||||
auto min_timestamp = bucket.front()->get_stats_metadata().min_timestamp;
|
||||
if (bucket.size() >= size_t(min_threshold) ||
|
||||
(bucket.size() >= 2 && !incoming_window.on_target(min_timestamp))) {
|
||||
trim_to_threshold(bucket, max_threshold);
|
||||
return bucket;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param bucket list of sstables, ordered from newest to oldest by getMinTimestamp().
|
||||
* @param maxThreshold maximum number of sstables in a single compaction task.
|
||||
* @return A bucket trimmed to the <code>maxThreshold</code> newest sstables.
|
||||
*/
|
||||
static void trim_to_threshold(std::vector<sstables::shared_sstable>& bucket, int max_threshold) {
|
||||
// Trim the oldest sstables off the end to meet the maxThreshold
|
||||
bucket.resize(std::min(bucket.size(), size_t(max_threshold)));
|
||||
}
|
||||
};
|
||||
|
||||
namespace sstables {
|
||||
|
||||
class date_tiered_compaction_strategy : public compaction_strategy_impl {
|
||||
date_tiered_manifest _manifest;
|
||||
public:
|
||||
date_tiered_compaction_strategy(const std::map<sstring, sstring>& options);
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) override;
|
||||
|
||||
virtual int64_t estimated_pending_compactions(table_state& table_s) const override {
|
||||
return _manifest.get_estimated_tasks(table_s);
|
||||
}
|
||||
|
||||
virtual compaction_strategy_type type() const override {
|
||||
return compaction_strategy_type::date_tiered;
|
||||
}
|
||||
|
||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -19,9 +19,8 @@ leveled_compaction_strategy_state& leveled_compaction_strategy::get_state(table_
|
||||
return table_s.get_compaction_strategy_state().get<leveled_compaction_strategy_state>();
|
||||
}
|
||||
|
||||
compaction_descriptor leveled_compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control) {
|
||||
compaction_descriptor leveled_compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) {
|
||||
auto& state = get_state(table_s);
|
||||
auto candidates = control.candidates(table_s);
|
||||
// NOTE: leveled_manifest creation may be slightly expensive, so later on,
|
||||
// we may want to store it in the strategy itself. However, the sstable
|
||||
// lists managed by the manifest may become outdated. For example, one
|
||||
@@ -51,18 +50,18 @@ compaction_descriptor leveled_compaction_strategy::get_sstables_for_compaction(t
|
||||
auto& sstables = manifest.get_level(level);
|
||||
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
|
||||
auto e = boost::range::remove_if(sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s);
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
|
||||
});
|
||||
sstables.erase(e, sstables.end());
|
||||
if (sstables.empty()) {
|
||||
continue;
|
||||
}
|
||||
auto& sst = *std::max_element(sstables.begin(), sstables.end(), [&] (auto& i, auto& j) {
|
||||
auto gc_before1 = i->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
|
||||
auto gc_before2 = j->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
|
||||
auto gc_before1 = i->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state());
|
||||
auto gc_before2 = j->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state());
|
||||
return i->estimate_droppable_tombstone_ratio(gc_before1) < j->estimate_droppable_tombstone_ratio(gc_before2);
|
||||
});
|
||||
return sstables::compaction_descriptor({ sst }, sst->get_sstable_level());
|
||||
return sstables::compaction_descriptor({ sst }, service::get_local_compaction_priority(), sst->get_sstable_level());
|
||||
}
|
||||
return {};
|
||||
}
|
||||
@@ -146,7 +145,7 @@ int64_t leveled_compaction_strategy::estimated_pending_compactions(table_state&
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
||||
leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const {
|
||||
std::array<std::vector<shared_sstable>, leveled_manifest::MAX_LEVELS> level_info;
|
||||
|
||||
auto is_disjoint = [schema] (const std::vector<shared_sstable>& sstables, unsigned tolerance) -> std::tuple<bool, unsigned> {
|
||||
@@ -156,8 +155,6 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
|
||||
|
||||
auto max_sstable_size_in_bytes = _max_sstable_size_in_mb * 1024 * 1024;
|
||||
|
||||
clogger.debug("get_reshaping_job: mode={} input.size={} max_sstable_size_in_bytes={}", mode == reshape_mode::relaxed ? "relaxed" : "strict", input.size(), max_sstable_size_in_bytes);
|
||||
|
||||
for (auto& sst : input) {
|
||||
auto sst_level = sst->get_sstable_level();
|
||||
if (sst_level > leveled_manifest::MAX_LEVELS - 1) {
|
||||
@@ -165,7 +162,7 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
|
||||
|
||||
// This is really unexpected, so we'll just compact it all to fix it
|
||||
auto ideal_level = ideal_level_for_input(input, max_sstable_size_in_bytes);
|
||||
compaction_descriptor desc(std::move(input), ideal_level, max_sstable_size_in_bytes);
|
||||
compaction_descriptor desc(std::move(input), iop, ideal_level, max_sstable_size_in_bytes);
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
return desc;
|
||||
}
|
||||
@@ -196,14 +193,14 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
|
||||
unsigned ideal_level = ideal_level_for_input(level_info[0], max_sstable_size_in_bytes);
|
||||
|
||||
leveled_manifest::logger.info("Reshaping {} disjoint sstables in level 0 into level {}", level_info[0].size(), ideal_level);
|
||||
compaction_descriptor desc(std::move(input), ideal_level, max_sstable_size_in_bytes);
|
||||
compaction_descriptor desc(std::move(input), iop, ideal_level, max_sstable_size_in_bytes);
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
return desc;
|
||||
}
|
||||
|
||||
if (level_info[0].size() > offstrategy_threshold) {
|
||||
size_tiered_compaction_strategy stcs(_stcs_options);
|
||||
return stcs.get_reshaping_job(std::move(level_info[0]), schema, mode);
|
||||
return stcs.get_reshaping_job(std::move(level_info[0]), schema, iop, mode);
|
||||
}
|
||||
|
||||
for (unsigned level = leveled_manifest::MAX_LEVELS - 1; level > 0; --level) {
|
||||
@@ -214,7 +211,7 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
|
||||
auto [disjoint, overlapping_sstables] = is_disjoint(level_info[level], tolerance(level));
|
||||
if (!disjoint) {
|
||||
leveled_manifest::logger.warn("Turns out that level {} is not disjoint, found {} overlapping SSTables, so the level will be entirely compacted on behalf of {}.{}", level, overlapping_sstables, schema->ks_name(), schema->cf_name());
|
||||
compaction_descriptor desc(std::move(level_info[level]), level, max_sstable_size_in_bytes);
|
||||
compaction_descriptor desc(std::move(level_info[level]), iop, level, max_sstable_size_in_bytes);
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
return desc;
|
||||
}
|
||||
@@ -234,15 +231,12 @@ leveled_compaction_strategy::get_cleanup_compaction_jobs(table_state& table_s, s
|
||||
if (levels[level].empty()) {
|
||||
continue;
|
||||
}
|
||||
ret.push_back(compaction_descriptor(std::move(levels[level]), level, _max_sstable_size_in_mb * 1024 * 1024));
|
||||
ret.push_back(compaction_descriptor(std::move(levels[level]), service::get_local_compaction_priority(), level, _max_sstable_size_in_mb * 1024 * 1024));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned leveled_compaction_strategy::ideal_level_for_input(const std::vector<sstables::shared_sstable>& input, uint64_t max_sstable_size) {
|
||||
if (!max_sstable_size) {
|
||||
return 1;
|
||||
}
|
||||
auto log_fanout = [fanout = leveled_manifest::leveled_fan_out] (double x) {
|
||||
double inv_log_fanout = 1.0f / std::log(fanout);
|
||||
return log(x) * inv_log_fanout;
|
||||
|
||||
@@ -36,7 +36,7 @@ struct leveled_compaction_strategy_state {
|
||||
|
||||
class leveled_compaction_strategy : public compaction_strategy_impl {
|
||||
static constexpr int32_t DEFAULT_MAX_SSTABLE_SIZE_IN_MB = 160;
|
||||
static constexpr auto SSTABLE_SIZE_OPTION = "sstable_size_in_mb";
|
||||
const sstring SSTABLE_SIZE_OPTION = "sstable_size_in_mb";
|
||||
|
||||
int32_t _max_sstable_size_in_mb = DEFAULT_MAX_SSTABLE_SIZE_IN_MB;
|
||||
size_tiered_compaction_strategy_options _stcs_options;
|
||||
@@ -46,10 +46,9 @@ private:
|
||||
leveled_compaction_strategy_state& get_state(table_state& table_s) const;
|
||||
public:
|
||||
static unsigned ideal_level_for_input(const std::vector<sstables::shared_sstable>& input, uint64_t max_sstable_size);
|
||||
static void validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
|
||||
|
||||
leveled_compaction_strategy(const std::map<sstring, sstring>& options);
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control) override;
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) override;
|
||||
|
||||
virtual std::vector<compaction_descriptor> get_cleanup_compaction_jobs(table_state& table_s, std::vector<shared_sstable> candidates) const override;
|
||||
|
||||
@@ -74,7 +73,7 @@ public:
|
||||
|
||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
||||
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -11,11 +11,13 @@
|
||||
#pragma once
|
||||
|
||||
#include "sstables/sstables.hh"
|
||||
#include "compaction.hh"
|
||||
#include "size_tiered_compaction_strategy.hh"
|
||||
#include "range.hh"
|
||||
#include "log.hh"
|
||||
#include <boost/range/algorithm/sort.hpp>
|
||||
#include <boost/range/algorithm/partial_sort.hpp>
|
||||
#include "service/priority_manager.hh"
|
||||
|
||||
class leveled_manifest {
|
||||
table_state& _table_s;
|
||||
@@ -147,7 +149,8 @@ public:
|
||||
if (info.can_promote) {
|
||||
info.candidates = get_overlapping_starved_sstables(next_level, std::move(info.candidates), compaction_counter);
|
||||
}
|
||||
return sstables::compaction_descriptor(std::move(info.candidates), next_level, _max_sstable_size_in_bytes);
|
||||
return sstables::compaction_descriptor(std::move(info.candidates),
|
||||
service::get_local_compaction_priority(), next_level, _max_sstable_size_in_bytes);
|
||||
} else {
|
||||
logger.debug("No compaction candidates for L{}", level);
|
||||
return sstables::compaction_descriptor();
|
||||
@@ -211,7 +214,8 @@ public:
|
||||
_table_s.min_compaction_threshold(), _schema->max_compaction_threshold(), _stcs_options);
|
||||
if (!most_interesting.empty()) {
|
||||
logger.debug("L0 is too far behind, performing size-tiering there first");
|
||||
return sstables::compaction_descriptor(std::move(most_interesting));
|
||||
return sstables::compaction_descriptor(std::move(most_interesting),
|
||||
service::get_local_compaction_priority());
|
||||
}
|
||||
}
|
||||
auto descriptor = get_descriptor_for_level(i, last_compacted_keys, compaction_counter);
|
||||
@@ -225,7 +229,8 @@ public:
|
||||
auto info = get_candidates_for(0, last_compacted_keys);
|
||||
if (!info.candidates.empty()) {
|
||||
auto next_level = get_next_level(info.candidates, info.can_promote);
|
||||
return sstables::compaction_descriptor(std::move(info.candidates), next_level, _max_sstable_size_in_bytes);
|
||||
return sstables::compaction_descriptor(std::move(info.candidates),
|
||||
service::get_local_compaction_priority(), next_level, _max_sstable_size_in_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "compaction_backlog_manager.hh"
|
||||
#include "size_tiered_compaction_strategy.hh"
|
||||
#include <cmath>
|
||||
#include <ctgmath>
|
||||
|
||||
// Backlog for one SSTable under STCS:
|
||||
//
|
||||
@@ -63,14 +64,10 @@
|
||||
// certain point in time, whose size is the amount of bytes currently written. So all we need
|
||||
// to do is keep track of them too, and add the current estimate to the static part of (4).
|
||||
class size_tiered_backlog_tracker final : public compaction_backlog_tracker::impl {
|
||||
struct sstables_backlog_contribution {
|
||||
double value = 0.0f;
|
||||
std::unordered_set<sstables::shared_sstable> sstables;
|
||||
};
|
||||
|
||||
sstables::size_tiered_compaction_strategy_options _stcs_options;
|
||||
int64_t _total_bytes = 0;
|
||||
sstables_backlog_contribution _contrib;
|
||||
double _sstables_backlog_contribution = 0.0f;
|
||||
std::unordered_set<sstables::shared_sstable> _sstables_contributing_backlog;
|
||||
std::unordered_set<sstables::shared_sstable> _all;
|
||||
|
||||
struct inflight_component {
|
||||
@@ -80,12 +77,12 @@ class size_tiered_backlog_tracker final : public compaction_backlog_tracker::imp
|
||||
|
||||
inflight_component compacted_backlog(const compaction_backlog_tracker::ongoing_compactions& ongoing_compactions) const;
|
||||
|
||||
static double log4(double x) {
|
||||
double log4(double x) const {
|
||||
double inv_log_4 = 1.0f / std::log(4);
|
||||
return log(x) * inv_log_4;
|
||||
}
|
||||
|
||||
static sstables_backlog_contribution calculate_sstables_backlog_contribution(const std::vector<sstables::shared_sstable>& all, const sstables::size_tiered_compaction_strategy_options& stcs_options);
|
||||
void refresh_sstables_backlog_contribution();
|
||||
public:
|
||||
size_tiered_backlog_tracker(sstables::size_tiered_compaction_strategy_options stcs_options) : _stcs_options(stcs_options) {}
|
||||
|
||||
@@ -93,8 +90,7 @@ public:
|
||||
|
||||
// Removing could be the result of a failure of an in progress write, successful finish of a
|
||||
// compaction, or some one-off operation, like drop
|
||||
// Provides strong exception safety guarantees.
|
||||
virtual void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts) override;
|
||||
virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) override;
|
||||
|
||||
int64_t total_bytes() const {
|
||||
return _total_bytes;
|
||||
|
||||
@@ -15,73 +15,20 @@
|
||||
|
||||
namespace sstables {
|
||||
|
||||
static long validate_sstable_size(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, size_tiered_compaction_strategy_options::MIN_SSTABLE_SIZE_KEY);
|
||||
auto min_sstables_size = cql3::statements::property_definitions::to_long(size_tiered_compaction_strategy_options::MIN_SSTABLE_SIZE_KEY, tmp_value, size_tiered_compaction_strategy_options::DEFAULT_MIN_SSTABLE_SIZE);
|
||||
if (min_sstables_size < 0) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be non negative", size_tiered_compaction_strategy_options::MIN_SSTABLE_SIZE_KEY, min_sstables_size));
|
||||
}
|
||||
return min_sstables_size;
|
||||
}
|
||||
|
||||
static long validate_sstable_size(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
auto min_sstables_size = validate_sstable_size(options);
|
||||
unchecked_options.erase(size_tiered_compaction_strategy_options::MIN_SSTABLE_SIZE_KEY);
|
||||
return min_sstables_size;
|
||||
}
|
||||
|
||||
static double validate_bucket_low(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, size_tiered_compaction_strategy_options::BUCKET_LOW_KEY);
|
||||
auto bucket_low = cql3::statements::property_definitions::to_double(size_tiered_compaction_strategy_options::BUCKET_LOW_KEY, tmp_value, size_tiered_compaction_strategy_options::DEFAULT_BUCKET_LOW);
|
||||
if (bucket_low <= 0.0 || bucket_low >= 1.0) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be between 0.0 and 1.0", size_tiered_compaction_strategy_options::BUCKET_LOW_KEY, bucket_low));
|
||||
}
|
||||
return bucket_low;
|
||||
}
|
||||
|
||||
static double validate_bucket_low(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
auto bucket_low = validate_bucket_low(options);
|
||||
unchecked_options.erase(size_tiered_compaction_strategy_options::BUCKET_LOW_KEY);
|
||||
return bucket_low;
|
||||
}
|
||||
|
||||
static double validate_bucket_high(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, size_tiered_compaction_strategy_options::BUCKET_HIGH_KEY);
|
||||
auto bucket_high = cql3::statements::property_definitions::to_double(size_tiered_compaction_strategy_options::BUCKET_HIGH_KEY, tmp_value, size_tiered_compaction_strategy_options::DEFAULT_BUCKET_HIGH);
|
||||
if (bucket_high <= 1.0) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be greater than 1.0", size_tiered_compaction_strategy_options::BUCKET_HIGH_KEY, bucket_high));
|
||||
}
|
||||
return bucket_high;
|
||||
}
|
||||
|
||||
static double validate_bucket_high(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
auto bucket_high = validate_bucket_high(options);
|
||||
unchecked_options.erase(size_tiered_compaction_strategy_options::BUCKET_HIGH_KEY);
|
||||
return bucket_high;
|
||||
}
|
||||
|
||||
static double validate_cold_reads_to_omit(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, size_tiered_compaction_strategy_options::COLD_READS_TO_OMIT_KEY);
|
||||
auto cold_reads_to_omit = cql3::statements::property_definitions::to_double(size_tiered_compaction_strategy_options::COLD_READS_TO_OMIT_KEY, tmp_value, size_tiered_compaction_strategy_options::DEFAULT_COLD_READS_TO_OMIT);
|
||||
if (cold_reads_to_omit < 0.0 || cold_reads_to_omit > 1.0) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be between 0.0 and 1.0", size_tiered_compaction_strategy_options::COLD_READS_TO_OMIT_KEY, cold_reads_to_omit));
|
||||
}
|
||||
return cold_reads_to_omit;
|
||||
}
|
||||
|
||||
static double validate_cold_reads_to_omit(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
auto cold_reads_to_omit = validate_cold_reads_to_omit(options);
|
||||
unchecked_options.erase(size_tiered_compaction_strategy_options::COLD_READS_TO_OMIT_KEY);
|
||||
return cold_reads_to_omit;
|
||||
}
|
||||
|
||||
size_tiered_compaction_strategy_options::size_tiered_compaction_strategy_options(const std::map<sstring, sstring>& options) {
|
||||
using namespace cql3::statements;
|
||||
|
||||
min_sstable_size = validate_sstable_size(options);
|
||||
bucket_low = validate_bucket_low(options);
|
||||
bucket_high = validate_bucket_high(options);
|
||||
cold_reads_to_omit = validate_cold_reads_to_omit(options);
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, MIN_SSTABLE_SIZE_KEY);
|
||||
min_sstable_size = property_definitions::to_long(MIN_SSTABLE_SIZE_KEY, tmp_value, DEFAULT_MIN_SSTABLE_SIZE);
|
||||
|
||||
tmp_value = compaction_strategy_impl::get_value(options, BUCKET_LOW_KEY);
|
||||
bucket_low = property_definitions::to_double(BUCKET_LOW_KEY, tmp_value, DEFAULT_BUCKET_LOW);
|
||||
|
||||
tmp_value = compaction_strategy_impl::get_value(options, BUCKET_HIGH_KEY);
|
||||
bucket_high = property_definitions::to_double(BUCKET_HIGH_KEY, tmp_value, DEFAULT_BUCKET_HIGH);
|
||||
|
||||
tmp_value = compaction_strategy_impl::get_value(options, COLD_READS_TO_OMIT_KEY);
|
||||
cold_reads_to_omit = property_definitions::to_double(COLD_READS_TO_OMIT_KEY, tmp_value, DEFAULT_COLD_READS_TO_OMIT);
|
||||
}
|
||||
|
||||
size_tiered_compaction_strategy_options::size_tiered_compaction_strategy_options() {
|
||||
@@ -91,20 +38,6 @@ size_tiered_compaction_strategy_options::size_tiered_compaction_strategy_options
|
||||
cold_reads_to_omit = DEFAULT_COLD_READS_TO_OMIT;
|
||||
}
|
||||
|
||||
// options is a map of compaction strategy options and their values.
|
||||
// unchecked_options is an analogical map from which already checked options are deleted.
|
||||
// This helps making sure that only allowed options are being set.
|
||||
void size_tiered_compaction_strategy_options::validate(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
validate_sstable_size(options, unchecked_options);
|
||||
auto bucket_low = validate_bucket_low(options, unchecked_options);
|
||||
auto bucket_high = validate_bucket_high(options, unchecked_options);
|
||||
if (bucket_high <= bucket_low) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) is less than or equal to the {} value ({})", BUCKET_HIGH_KEY, bucket_high, BUCKET_LOW_KEY, bucket_low));
|
||||
}
|
||||
validate_cold_reads_to_omit(options, unchecked_options);
|
||||
compaction_strategy_impl::validate_min_max_threshold(options, unchecked_options);
|
||||
}
|
||||
|
||||
std::vector<std::pair<sstables::shared_sstable, uint64_t>>
|
||||
size_tiered_compaction_strategy::create_sstable_and_length_pairs(const std::vector<sstables::shared_sstable>& sstables) {
|
||||
|
||||
@@ -210,12 +143,11 @@ size_tiered_compaction_strategy::most_interesting_bucket(std::vector<std::vector
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
size_tiered_compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control) {
|
||||
size_tiered_compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) {
|
||||
// make local copies so they can't be changed out from under us mid-method
|
||||
int min_threshold = table_s.min_compaction_threshold();
|
||||
int max_threshold = table_s.schema()->max_compaction_threshold();
|
||||
auto compaction_time = gc_clock::now();
|
||||
auto candidates = control.candidates(table_s);
|
||||
|
||||
// TODO: Add support to filter cold sstables (for reference: SizeTieredCompactionStrategy::filterColdSSTables).
|
||||
|
||||
@@ -223,13 +155,13 @@ size_tiered_compaction_strategy::get_sstables_for_compaction(table_state& table_
|
||||
|
||||
if (is_any_bucket_interesting(buckets, min_threshold)) {
|
||||
std::vector<sstables::shared_sstable> most_interesting = most_interesting_bucket(std::move(buckets), min_threshold, max_threshold);
|
||||
return sstables::compaction_descriptor(std::move(most_interesting));
|
||||
return sstables::compaction_descriptor(std::move(most_interesting), service::get_local_compaction_priority());
|
||||
}
|
||||
|
||||
// If we are not enforcing min_threshold explicitly, try any pair of SStables in the same tier.
|
||||
if (!table_s.compaction_enforce_min_threshold() && is_any_bucket_interesting(buckets, 2)) {
|
||||
std::vector<sstables::shared_sstable> most_interesting = most_interesting_bucket(std::move(buckets), 2, max_threshold);
|
||||
return sstables::compaction_descriptor(std::move(most_interesting));
|
||||
return sstables::compaction_descriptor(std::move(most_interesting), service::get_local_compaction_priority());
|
||||
}
|
||||
|
||||
if (!table_s.tombstone_gc_enabled()) {
|
||||
@@ -243,7 +175,7 @@ size_tiered_compaction_strategy::get_sstables_for_compaction(table_state& table_
|
||||
for (auto&& sstables : buckets | boost::adaptors::reversed) {
|
||||
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
|
||||
auto e = boost::range::remove_if(sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s);
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
|
||||
});
|
||||
sstables.erase(e, sstables.end());
|
||||
if (sstables.empty()) {
|
||||
@@ -253,7 +185,7 @@ size_tiered_compaction_strategy::get_sstables_for_compaction(table_state& table_
|
||||
auto it = std::min_element(sstables.begin(), sstables.end(), [] (auto& i, auto& j) {
|
||||
return i->get_stats_metadata().min_timestamp < j->get_stats_metadata().min_timestamp;
|
||||
});
|
||||
return sstables::compaction_descriptor({ *it });
|
||||
return sstables::compaction_descriptor({ *it }, service::get_local_compaction_priority());
|
||||
}
|
||||
return sstables::compaction_descriptor();
|
||||
}
|
||||
@@ -297,7 +229,7 @@ size_tiered_compaction_strategy::most_interesting_bucket(const std::vector<sstab
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const
|
||||
size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const
|
||||
{
|
||||
size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
|
||||
size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
|
||||
@@ -313,7 +245,7 @@ size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
||||
// All sstables can be reshaped at once if the amount of overlapping will not cause memory usage to be high,
|
||||
// which is possible because partitioned set is able to incrementally open sstables during compaction
|
||||
if (sstable_set_overlapping_count(schema, input) <= max_sstables) {
|
||||
compaction_descriptor desc(std::move(input));
|
||||
compaction_descriptor desc(std::move(input), iop);
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
return desc;
|
||||
}
|
||||
@@ -329,7 +261,7 @@ size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
||||
});
|
||||
bucket.resize(max_sstables);
|
||||
}
|
||||
compaction_descriptor desc(std::move(bucket));
|
||||
compaction_descriptor desc(std::move(bucket), iop);
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
return desc;
|
||||
}
|
||||
@@ -357,7 +289,7 @@ size_tiered_compaction_strategy::get_cleanup_compaction_jobs(table_state& table_
|
||||
unsigned needed = std::min(remaining, max_threshold);
|
||||
std::vector<shared_sstable> sstables;
|
||||
std::move(it, it + needed, std::back_inserter(sstables));
|
||||
ret.push_back(compaction_descriptor(std::move(sstables)));
|
||||
ret.push_back(compaction_descriptor(std::move(sstables), service::get_local_compaction_priority()));
|
||||
std::advance(it, needed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,16 +18,15 @@ class size_tiered_backlog_tracker;
|
||||
namespace sstables {
|
||||
|
||||
class size_tiered_compaction_strategy_options {
|
||||
public:
|
||||
static constexpr uint64_t DEFAULT_MIN_SSTABLE_SIZE = 50L * 1024L * 1024L;
|
||||
static constexpr double DEFAULT_BUCKET_LOW = 0.5;
|
||||
static constexpr double DEFAULT_BUCKET_HIGH = 1.5;
|
||||
static constexpr double DEFAULT_COLD_READS_TO_OMIT = 0.05;
|
||||
static constexpr auto MIN_SSTABLE_SIZE_KEY = "min_sstable_size";
|
||||
static constexpr auto BUCKET_LOW_KEY = "bucket_low";
|
||||
static constexpr auto BUCKET_HIGH_KEY = "bucket_high";
|
||||
static constexpr auto COLD_READS_TO_OMIT_KEY = "cold_reads_to_omit";
|
||||
private:
|
||||
const sstring MIN_SSTABLE_SIZE_KEY = "min_sstable_size";
|
||||
const sstring BUCKET_LOW_KEY = "bucket_low";
|
||||
const sstring BUCKET_HIGH_KEY = "bucket_high";
|
||||
const sstring COLD_READS_TO_OMIT_KEY = "cold_reads_to_omit";
|
||||
|
||||
uint64_t min_sstable_size = DEFAULT_MIN_SSTABLE_SIZE;
|
||||
double bucket_low = DEFAULT_BUCKET_LOW;
|
||||
double bucket_high = DEFAULT_BUCKET_HIGH;
|
||||
@@ -36,13 +35,48 @@ public:
|
||||
size_tiered_compaction_strategy_options(const std::map<sstring, sstring>& options);
|
||||
|
||||
size_tiered_compaction_strategy_options();
|
||||
size_tiered_compaction_strategy_options(const size_tiered_compaction_strategy_options&) = default;
|
||||
size_tiered_compaction_strategy_options(size_tiered_compaction_strategy_options&&) = default;
|
||||
size_tiered_compaction_strategy_options& operator=(const size_tiered_compaction_strategy_options&) = default;
|
||||
size_tiered_compaction_strategy_options& operator=(size_tiered_compaction_strategy_options&&) = default;
|
||||
|
||||
static void validate(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
|
||||
// FIXME: convert java code below.
|
||||
#if 0
|
||||
public static Map<String, String> validateOptions(Map<String, String> options, Map<String, String> uncheckedOptions) throws ConfigurationException
|
||||
{
|
||||
String optionValue = options.get(MIN_SSTABLE_SIZE_KEY);
|
||||
try
|
||||
{
|
||||
long minSSTableSize = optionValue == null ? DEFAULT_MIN_SSTABLE_SIZE : Long.parseLong(optionValue);
|
||||
if (minSSTableSize < 0)
|
||||
{
|
||||
throw new ConfigurationException(String.format("%s must be non negative: %d", MIN_SSTABLE_SIZE_KEY, minSSTableSize));
|
||||
}
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
throw new ConfigurationException(String.format("%s is not a parsable int (base10) for %s", optionValue, MIN_SSTABLE_SIZE_KEY), e);
|
||||
}
|
||||
|
||||
double bucketLow = parseDouble(options, BUCKET_LOW_KEY, DEFAULT_BUCKET_LOW);
|
||||
double bucketHigh = parseDouble(options, BUCKET_HIGH_KEY, DEFAULT_BUCKET_HIGH);
|
||||
if (bucketHigh <= bucketLow)
|
||||
{
|
||||
throw new ConfigurationException(String.format("%s value (%s) is less than or equal to the %s value (%s)",
|
||||
BUCKET_HIGH_KEY, bucketHigh, BUCKET_LOW_KEY, bucketLow));
|
||||
}
|
||||
|
||||
double maxColdReadsRatio = parseDouble(options, COLD_READS_TO_OMIT_KEY, DEFAULT_COLD_READS_TO_OMIT);
|
||||
if (maxColdReadsRatio < 0.0 || maxColdReadsRatio > 1.0)
|
||||
{
|
||||
throw new ConfigurationException(String.format("%s value (%s) should be between between 0.0 and 1.0",
|
||||
COLD_READS_TO_OMIT_KEY, optionValue));
|
||||
}
|
||||
|
||||
uncheckedOptions.remove(MIN_SSTABLE_SIZE_KEY);
|
||||
uncheckedOptions.remove(BUCKET_LOW_KEY);
|
||||
uncheckedOptions.remove(BUCKET_HIGH_KEY);
|
||||
uncheckedOptions.remove(COLD_READS_TO_OMIT_KEY);
|
||||
|
||||
return uncheckedOptions;
|
||||
}
|
||||
#endif
|
||||
friend class size_tiered_compaction_strategy;
|
||||
};
|
||||
|
||||
@@ -75,9 +109,8 @@ public:
|
||||
|
||||
size_tiered_compaction_strategy(const std::map<sstring, sstring>& options);
|
||||
explicit size_tiered_compaction_strategy(const size_tiered_compaction_strategy_options& options);
|
||||
static void validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
|
||||
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control) override;
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) override;
|
||||
|
||||
virtual std::vector<compaction_descriptor> get_cleanup_compaction_jobs(table_state& table_s, std::vector<shared_sstable> candidates) const override;
|
||||
|
||||
@@ -96,7 +129,7 @@ public:
|
||||
|
||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
||||
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const override;
|
||||
|
||||
friend class ::size_tiered_backlog_tracker;
|
||||
};
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "compaction/compaction_fwd.hh"
|
||||
#include "sstables/sstable_set.hh"
|
||||
|
||||
namespace compaction {
|
||||
|
||||
@@ -19,8 +18,6 @@ class strategy_control {
|
||||
public:
|
||||
virtual ~strategy_control() {}
|
||||
virtual bool has_ongoing_compaction(table_state& table_s) const noexcept = 0;
|
||||
virtual std::vector<sstables::shared_sstable> candidates(table_state&) const = 0;
|
||||
virtual std::vector<sstables::frozen_sstable_run> candidates_as_runs(table_state&) const = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ public:
|
||||
virtual bool tombstone_gc_enabled() const noexcept = 0;
|
||||
virtual const tombstone_gc_state& get_tombstone_gc_state() const noexcept = 0;
|
||||
virtual compaction_backlog_tracker& get_backlog_tracker() = 0;
|
||||
virtual const std::string get_group_id() const noexcept = 0;
|
||||
virtual const std::string& get_group_id() const noexcept = 0;
|
||||
virtual seastar::condition_variable& get_staging_done_condition() noexcept = 0;
|
||||
};
|
||||
|
||||
|
||||
@@ -6,263 +6,29 @@
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
#include <boost/range/algorithm/min_element.hpp>
|
||||
|
||||
#include "compaction/task_manager_module.hh"
|
||||
#include "compaction/compaction_manager.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "sstables/sstable_directory.hh"
|
||||
#include "utils/pretty_printers.hh"
|
||||
|
||||
namespace replica {
|
||||
|
||||
// Helper structure for resharding.
|
||||
//
|
||||
// Describes the sstables (represented by their foreign_sstable_open_info) that are shared and
|
||||
// need to be resharded. Each shard will keep one such descriptor, that contains the list of
|
||||
// SSTables assigned to it, and their total size. The total size is used to make sure we are
|
||||
// fairly balancing SSTables among shards.
|
||||
struct reshard_shard_descriptor {
|
||||
sstables::sstable_directory::sstable_open_info_vector info_vec;
|
||||
uint64_t uncompressed_data_size = 0;
|
||||
|
||||
bool total_size_smaller(const reshard_shard_descriptor& rhs) const {
|
||||
return uncompressed_data_size < rhs.uncompressed_data_size;
|
||||
}
|
||||
|
||||
uint64_t size() const {
|
||||
return uncompressed_data_size;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace replica
|
||||
|
||||
// Collects shared SSTables from all shards and sstables that require cleanup and returns a vector containing them all.
|
||||
// This function assumes that the list of SSTables can be fairly big so it is careful to
|
||||
// manipulate it in a do_for_each loop (which yields) instead of using standard accumulators.
|
||||
future<sstables::sstable_directory::sstable_open_info_vector>
|
||||
collect_all_shared_sstables(sharded<sstables::sstable_directory>& dir, sharded<replica::database>& db, sstring ks_name, sstring table_name, compaction::owned_ranges_ptr owned_ranges_ptr) {
|
||||
auto info_vec = sstables::sstable_directory::sstable_open_info_vector();
|
||||
|
||||
// We want to make sure that each distributed object reshards about the same amount of data.
|
||||
// Each sharded object has its own shared SSTables. We can use a clever algorithm in which they
|
||||
// all distributely figure out which SSTables to exchange, but we'll keep it simple and move all
|
||||
// their foreign_sstable_open_info to a coordinator (the shard who called this function). We can
|
||||
// move in bulk and that's efficient. That shard can then distribute the work among all the
|
||||
// others who will reshard.
|
||||
auto coordinator = this_shard_id();
|
||||
// We will first move all of the foreign open info to temporary storage so that we can sort
|
||||
// them. We want to distribute bigger sstables first.
|
||||
const auto* sorted_owned_ranges_ptr = owned_ranges_ptr.get();
|
||||
co_await dir.invoke_on_all([&] (sstables::sstable_directory& d) -> future<> {
|
||||
auto shared_sstables = d.retrieve_shared_sstables();
|
||||
sstables::sstable_directory::sstable_open_info_vector need_cleanup;
|
||||
if (sorted_owned_ranges_ptr) {
|
||||
co_await d.filter_sstables([&] (sstables::shared_sstable sst) -> future<bool> {
|
||||
if (needs_cleanup(sst, *sorted_owned_ranges_ptr)) {
|
||||
need_cleanup.push_back(co_await sst->get_open_info());
|
||||
co_return false;
|
||||
}
|
||||
co_return true;
|
||||
});
|
||||
}
|
||||
if (shared_sstables.empty() && need_cleanup.empty()) {
|
||||
co_return;
|
||||
}
|
||||
co_await smp::submit_to(coordinator, [&] () -> future<> {
|
||||
info_vec.reserve(info_vec.size() + shared_sstables.size() + need_cleanup.size());
|
||||
for (auto& info : shared_sstables) {
|
||||
info_vec.emplace_back(std::move(info));
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
for (auto& info : need_cleanup) {
|
||||
info_vec.emplace_back(std::move(info));
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
co_return info_vec;
|
||||
}
|
||||
|
||||
// Given a vector of shared sstables to be resharded, distribute it among all shards.
|
||||
// The vector is first sorted to make sure that we are moving the biggest SSTables first.
|
||||
//
|
||||
// Returns a reshard_shard_descriptor per shard indicating the work that each shard has to do.
|
||||
future<std::vector<replica::reshard_shard_descriptor>>
|
||||
distribute_reshard_jobs(sstables::sstable_directory::sstable_open_info_vector source) {
|
||||
auto destinations = std::vector<replica::reshard_shard_descriptor>(smp::count);
|
||||
|
||||
std::sort(source.begin(), source.end(), [] (const sstables::foreign_sstable_open_info& a, const sstables::foreign_sstable_open_info& b) {
|
||||
// Sort on descending SSTable sizes.
|
||||
return a.uncompressed_data_size > b.uncompressed_data_size;
|
||||
});
|
||||
|
||||
for (auto& info : source) {
|
||||
// Choose the stable shard owner with the smallest amount of accumulated work.
|
||||
// Note that for sstables that need cleanup via resharding, owners may contain
|
||||
// a single shard.
|
||||
auto shard_it = boost::min_element(info.owners, [&] (const shard_id& lhs, const shard_id& rhs) {
|
||||
return destinations[lhs].total_size_smaller(destinations[rhs]);
|
||||
});
|
||||
auto& dest = destinations[*shard_it];
|
||||
dest.uncompressed_data_size += info.uncompressed_data_size;
|
||||
dest.info_vec.push_back(std::move(info));
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
|
||||
co_return destinations;
|
||||
}
|
||||
|
||||
// reshards a collection of SSTables.
|
||||
//
|
||||
// A reference to the compaction manager must be passed so we can register with it. Knowing
|
||||
// which table is being processed is a requirement of the compaction manager, so this must be
|
||||
// passed too.
|
||||
//
|
||||
// We will reshard max_sstables_per_job at once.
|
||||
//
|
||||
// A creator function must be passed that will create an SSTable object in the correct shard,
|
||||
// and an I/O priority must be specified.
|
||||
future<> reshard(sstables::sstable_directory& dir, sstables::sstable_directory::sstable_open_info_vector shared_info, replica::table& table,
|
||||
sstables::compaction_sstable_creator_fn creator, compaction::owned_ranges_ptr owned_ranges_ptr, std::optional<tasks::task_info> parent_info)
|
||||
{
|
||||
// Resharding doesn't like empty sstable sets, so bail early. There is nothing
|
||||
// to reshard in this shard.
|
||||
if (shared_info.empty()) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
// We want to reshard many SSTables at a time for efficiency. However if we have too many we may
|
||||
// be risking OOM.
|
||||
auto max_sstables_per_job = table.schema()->max_compaction_threshold();
|
||||
auto num_jobs = (shared_info.size() + max_sstables_per_job - 1) / max_sstables_per_job;
|
||||
auto sstables_per_job = shared_info.size() / num_jobs;
|
||||
|
||||
std::vector<std::vector<sstables::shared_sstable>> buckets;
|
||||
buckets.reserve(num_jobs);
|
||||
buckets.emplace_back();
|
||||
co_await coroutine::parallel_for_each(shared_info, [&] (sstables::foreign_sstable_open_info& info) -> future<> {
|
||||
auto sst = co_await dir.load_foreign_sstable(info);
|
||||
// Last bucket gets leftover SSTables
|
||||
if ((buckets.back().size() >= sstables_per_job) && (buckets.size() < num_jobs)) {
|
||||
buckets.emplace_back();
|
||||
}
|
||||
buckets.back().push_back(std::move(sst));
|
||||
});
|
||||
// There is a semaphore inside the compaction manager in run_resharding_jobs. So we
|
||||
// parallel_for_each so the statistics about pending jobs are updated to reflect all
|
||||
// jobs. But only one will run in parallel at a time
|
||||
auto& t = table.as_table_state();
|
||||
co_await coroutine::parallel_for_each(buckets, [&] (std::vector<sstables::shared_sstable>& sstlist) mutable {
|
||||
return table.get_compaction_manager().run_custom_job(table.as_table_state(), sstables::compaction_type::Reshard, "Reshard compaction", [&] (sstables::compaction_data& info, sstables::compaction_progress_monitor& progress_monitor) -> future<> {
|
||||
auto erm = table.get_effective_replication_map(); // keep alive around compaction.
|
||||
|
||||
sstables::compaction_descriptor desc(sstlist);
|
||||
desc.options = sstables::compaction_type_options::make_reshard();
|
||||
desc.creator = creator;
|
||||
desc.sharder = &erm->get_sharder(*table.schema());
|
||||
desc.owned_ranges = owned_ranges_ptr;
|
||||
|
||||
auto result = co_await sstables::compact_sstables(std::move(desc), info, t, progress_monitor);
|
||||
// input sstables are moved, to guarantee their resources are released once we're done
|
||||
// resharding them.
|
||||
co_await when_all_succeed(dir.collect_output_unshared_sstables(std::move(result.new_sstables), sstables::sstable_directory::can_be_remote::yes), dir.remove_sstables(std::move(sstlist))).discard_result();
|
||||
}, parent_info, throw_if_stopping::no);
|
||||
});
|
||||
}
|
||||
|
||||
namespace compaction {
|
||||
|
||||
struct table_tasks_info {
|
||||
tasks::task_manager::task_ptr task;
|
||||
table_info ti;
|
||||
|
||||
table_tasks_info(tasks::task_manager::task_ptr t, table_info info)
|
||||
: task(t)
|
||||
, ti(info)
|
||||
{}
|
||||
};
|
||||
|
||||
future<> run_on_table(sstring op, replica::database& db, std::string keyspace, table_info ti, std::function<future<> (replica::table&)> func) {
|
||||
std::exception_ptr ex;
|
||||
tasks::tmlogger.debug("Starting {} on {}.{}", op, keyspace, ti.name);
|
||||
try {
|
||||
co_await func(db.find_column_family(ti.id));
|
||||
} catch (const replica::no_such_column_family& e) {
|
||||
tasks::tmlogger.warn("Skipping {} of {}.{}: {}", op, keyspace, ti.name, e.what());
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
tasks::tmlogger.error("Failed {} of {}.{}: {}", op, keyspace, ti.name, ex);
|
||||
}
|
||||
if (ex) {
|
||||
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||
}
|
||||
}
|
||||
|
||||
// Run on all tables, skipping dropped tables
|
||||
future<> run_on_existing_tables(sstring op, replica::database& db, std::string keyspace, const std::vector<table_info> local_tables, std::function<future<> (replica::table&)> func) {
|
||||
for (const auto& ti : local_tables) {
|
||||
co_await run_on_table(op, db, keyspace, ti, func);
|
||||
}
|
||||
}
|
||||
|
||||
future<> wait_for_your_turn(seastar::condition_variable& cv, tasks::task_manager::task_ptr& current_task, tasks::task_id id) {
|
||||
co_await cv.wait([&] {
|
||||
return current_task && current_task->id() == id;
|
||||
});
|
||||
}
|
||||
|
||||
future<> run_table_tasks(replica::database& db, std::vector<table_tasks_info> table_tasks, seastar::condition_variable& cv, tasks::task_manager::task_ptr& current_task, bool sort) {
|
||||
future<> run_on_existing_tables(sstring op, replica::database& db, std::string_view keyspace, const std::vector<table_id> local_tables, std::function<future<> (replica::table&)> func) {
|
||||
std::exception_ptr ex;
|
||||
|
||||
// While compaction is run on one table, the size of tables may significantly change.
|
||||
// Thus, they are sorted before each invidual compaction and the smallest table is chosen.
|
||||
while (!table_tasks.empty()) {
|
||||
for (const auto& ti : local_tables) {
|
||||
tasks::tmlogger.debug("Starting {} on {}.{}", op, keyspace, ti);
|
||||
try {
|
||||
if (sort) {
|
||||
// Major compact smaller tables first, to increase chances of success if low on space.
|
||||
// Tables will be kept in descending order.
|
||||
std::ranges::sort(table_tasks, std::greater<>(), [&] (const table_tasks_info& tti) {
|
||||
try {
|
||||
return db.find_column_family(tti.ti.id).get_stats().live_disk_space_used;
|
||||
} catch (const replica::no_such_column_family& e) {
|
||||
return int64_t(-1);
|
||||
}
|
||||
});
|
||||
}
|
||||
// Task responsible for the smallest table.
|
||||
current_task = table_tasks.back().task;
|
||||
table_tasks.pop_back();
|
||||
cv.broadcast();
|
||||
co_await current_task->done();
|
||||
co_await func(db.find_column_family(ti));
|
||||
} catch (const replica::no_such_column_family& e) {
|
||||
tasks::tmlogger.warn("Skipping {} of {}.{}: {}", op, keyspace, ti, e.what());
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
current_task = nullptr;
|
||||
cv.broken(ex);
|
||||
break;
|
||||
tasks::tmlogger.error("Failed {} of {}.{}: {}", op, keyspace, ti, ex);
|
||||
}
|
||||
if (ex) {
|
||||
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||
}
|
||||
}
|
||||
|
||||
if (ex) {
|
||||
// Wait for all tasks even on failure.
|
||||
for (auto& tti: table_tasks) {
|
||||
co_await tti.task->done();
|
||||
}
|
||||
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||
}
|
||||
}
|
||||
|
||||
future<tasks::task_manager::task::progress> compaction_task_impl::get_progress(const sstables::compaction_data& cdata, const sstables::compaction_progress_monitor& progress_monitor) const {
|
||||
if (cdata.compaction_size == 0) {
|
||||
co_return get_binary_progress();
|
||||
}
|
||||
|
||||
co_return tasks::task_manager::task::progress{
|
||||
.completed = is_done() ? cdata.compaction_size : progress_monitor.get_progress(), // Consider tasks which skip all files.
|
||||
.total = cdata.compaction_size
|
||||
};
|
||||
}
|
||||
|
||||
future<> major_keyspace_compaction_task_impl::run() {
|
||||
@@ -274,51 +40,48 @@ future<> major_keyspace_compaction_task_impl::run() {
|
||||
});
|
||||
}
|
||||
|
||||
future<> shard_major_keyspace_compaction_task_impl::run() {
|
||||
seastar::condition_variable cv;
|
||||
tasks::task_manager::task_ptr current_task;
|
||||
tasks::task_info parent_info{_status.id, _status.shard};
|
||||
std::vector<table_tasks_info> table_tasks;
|
||||
for (auto& ti : _local_tables) {
|
||||
table_tasks.emplace_back(co_await _module->make_and_start_task<table_major_keyspace_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task), ti);
|
||||
}
|
||||
|
||||
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, true);
|
||||
tasks::is_internal shard_major_keyspace_compaction_task_impl::is_internal() const noexcept {
|
||||
return tasks::is_internal::yes;
|
||||
}
|
||||
|
||||
future<> table_major_keyspace_compaction_task_impl::run() {
|
||||
co_await wait_for_your_turn(_cv, _current_task, _status.id);
|
||||
tasks::task_info info{_status.id, _status.shard};
|
||||
co_await run_on_table("force_keyspace_compaction", _db, _status.keyspace, _ti, [info] (replica::table& t) {
|
||||
return t.compact_all_sstables(info);
|
||||
future<> shard_major_keyspace_compaction_task_impl::run() {
|
||||
// Major compact smaller tables first, to increase chances of success if low on space.
|
||||
std::ranges::sort(_local_tables, std::less<>(), [&] (const table_id& ti) {
|
||||
try {
|
||||
return _db.find_column_family(ti).get_stats().live_disk_space_used;
|
||||
} catch (const replica::no_such_column_family& e) {
|
||||
return int64_t(-1);
|
||||
}
|
||||
});
|
||||
co_await run_on_existing_tables("force_keyspace_compaction", _db, _status.keyspace, _local_tables, [] (replica::table& t) {
|
||||
return t.compact_all_sstables();
|
||||
});
|
||||
}
|
||||
|
||||
future<> cleanup_keyspace_compaction_task_impl::run() {
|
||||
co_await _db.invoke_on_all([&] (replica::database& db) -> future<> {
|
||||
auto& module = db.get_compaction_manager().get_task_manager_module();
|
||||
auto task = co_await module.make_and_start_task<shard_cleanup_keyspace_compaction_task_impl>({_status.id, _status.shard}, _status.keyspace, _status.id, db, _table_infos);
|
||||
auto task = co_await module.make_and_start_task<shard_cleanup_keyspace_compaction_task_impl>({_status.id, _status.shard}, _status.keyspace, _status.id, db, _table_ids);
|
||||
co_await task->done();
|
||||
});
|
||||
}
|
||||
|
||||
future<> shard_cleanup_keyspace_compaction_task_impl::run() {
|
||||
seastar::condition_variable cv;
|
||||
tasks::task_manager::task_ptr current_task;
|
||||
tasks::task_info parent_info{_status.id, _status.shard};
|
||||
std::vector<table_tasks_info> table_tasks;
|
||||
for (auto& ti : _local_tables) {
|
||||
table_tasks.emplace_back(co_await _module->make_and_start_task<table_cleanup_keyspace_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task), ti);
|
||||
}
|
||||
|
||||
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, true);
|
||||
tasks::is_internal shard_cleanup_keyspace_compaction_task_impl::is_internal() const noexcept {
|
||||
return tasks::is_internal::yes;
|
||||
}
|
||||
|
||||
future<> table_cleanup_keyspace_compaction_task_impl::run() {
|
||||
co_await wait_for_your_turn(_cv, _current_task, _status.id);
|
||||
future<> shard_cleanup_keyspace_compaction_task_impl::run() {
|
||||
// Cleanup smaller tables first, to increase chances of success if low on space.
|
||||
std::ranges::sort(_local_tables, std::less<>(), [&] (const table_id& ti) {
|
||||
try {
|
||||
return _db.find_column_family(ti).get_stats().live_disk_space_used;
|
||||
} catch (const replica::no_such_column_family& e) {
|
||||
return int64_t(-1);
|
||||
}
|
||||
});
|
||||
auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(_db.get_keyspace_local_ranges(_status.keyspace));
|
||||
co_await run_on_table("force_keyspace_cleanup", _db, _status.keyspace, _ti, [&] (replica::table& t) {
|
||||
return t.perform_cleanup_compaction(owned_ranges_ptr, tasks::task_info{_status.id, _status.shard});
|
||||
co_await run_on_existing_tables("force_keyspace_cleanup", _db, _status.keyspace, _local_tables, [&] (replica::table& t) {
|
||||
return t.perform_cleanup_compaction(owned_ranges_ptr);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -333,23 +96,13 @@ future<> offstrategy_keyspace_compaction_task_impl::run() {
|
||||
}, false, std::plus<bool>());
|
||||
}
|
||||
|
||||
future<> shard_offstrategy_keyspace_compaction_task_impl::run() {
|
||||
seastar::condition_variable cv;
|
||||
tasks::task_manager::task_ptr current_task;
|
||||
tasks::task_info parent_info{_status.id, _status.shard};
|
||||
std::vector<table_tasks_info> table_tasks;
|
||||
for (auto& ti : _table_infos) {
|
||||
table_tasks.emplace_back(co_await _module->make_and_start_task<table_offstrategy_keyspace_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task, _needed), ti);
|
||||
}
|
||||
|
||||
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, false);
|
||||
tasks::is_internal shard_offstrategy_keyspace_compaction_task_impl::is_internal() const noexcept {
|
||||
return tasks::is_internal::yes;
|
||||
}
|
||||
|
||||
future<> table_offstrategy_keyspace_compaction_task_impl::run() {
|
||||
co_await wait_for_your_turn(_cv, _current_task, _status.id);
|
||||
tasks::task_info info{_status.id, _status.shard};
|
||||
co_await run_on_table("perform_keyspace_offstrategy_compaction", _db, _status.keyspace, _ti, [this, info] (replica::table& t) -> future<> {
|
||||
_needed |= co_await t.perform_offstrategy_compaction(info);
|
||||
future<> shard_offstrategy_keyspace_compaction_task_impl::run() {
|
||||
co_await run_on_existing_tables("perform_keyspace_offstrategy_compaction", _db, _status.keyspace, _table_infos, [this] (replica::table& t) -> future<> {
|
||||
_needed |= co_await t.perform_offstrategy_compaction();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -362,25 +115,15 @@ future<> upgrade_sstables_compaction_task_impl::run() {
|
||||
});
|
||||
}
|
||||
|
||||
future<> shard_upgrade_sstables_compaction_task_impl::run() {
|
||||
seastar::condition_variable cv;
|
||||
tasks::task_manager::task_ptr current_task;
|
||||
tasks::task_info parent_info{_status.id, _status.shard};
|
||||
std::vector<table_tasks_info> table_tasks;
|
||||
for (auto& ti : _table_infos) {
|
||||
table_tasks.emplace_back(co_await _module->make_and_start_task<table_upgrade_sstables_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task, _exclude_current_version), ti);
|
||||
}
|
||||
|
||||
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, false);
|
||||
tasks::is_internal shard_upgrade_sstables_compaction_task_impl::is_internal() const noexcept {
|
||||
return tasks::is_internal::yes;
|
||||
}
|
||||
|
||||
future<> table_upgrade_sstables_compaction_task_impl::run() {
|
||||
co_await wait_for_your_turn(_cv, _current_task, _status.id);
|
||||
future<> shard_upgrade_sstables_compaction_task_impl::run() {
|
||||
auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(_db.get_keyspace_local_ranges(_status.keyspace));
|
||||
tasks::task_info info{_status.id, _status.shard};
|
||||
co_await run_on_table("upgrade_sstables", _db, _status.keyspace, _ti, [&] (replica::table& t) -> future<> {
|
||||
co_await run_on_existing_tables("upgrade_sstables", _db, _status.keyspace, _table_infos, [&] (replica::table& t) -> future<> {
|
||||
return t.parallel_foreach_table_state([&] (compaction::table_state& ts) -> future<> {
|
||||
return t.get_compaction_manager().perform_sstable_upgrade(owned_ranges_ptr, ts, _exclude_current_version, info);
|
||||
return t.get_compaction_manager().perform_sstable_upgrade(owned_ranges_ptr, ts, _exclude_current_version);
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -396,6 +139,10 @@ future<> scrub_sstables_compaction_task_impl::run() {
|
||||
}, sstables::compaction_stats{}, std::plus<sstables::compaction_stats>());
|
||||
}
|
||||
|
||||
tasks::is_internal shard_scrub_sstables_compaction_task_impl::is_internal() const noexcept {
|
||||
return tasks::is_internal::yes;
|
||||
}
|
||||
|
||||
future<> shard_scrub_sstables_compaction_task_impl::run() {
|
||||
_stats = co_await map_reduce(_column_families, [&] (sstring cfname) -> future<sstables::compaction_stats> {
|
||||
sstables::compaction_stats stats{};
|
||||
@@ -407,123 +154,18 @@ future<> shard_scrub_sstables_compaction_task_impl::run() {
|
||||
}, sstables::compaction_stats{}, std::plus<sstables::compaction_stats>());
|
||||
}
|
||||
|
||||
tasks::is_internal table_scrub_sstables_compaction_task_impl::is_internal() const noexcept {
|
||||
return tasks::is_internal::yes;
|
||||
}
|
||||
|
||||
future<> table_scrub_sstables_compaction_task_impl::run() {
|
||||
auto& cm = _db.get_compaction_manager();
|
||||
auto& cf = _db.find_column_family(_status.keyspace, _status.table);
|
||||
tasks::task_info info{_status.id, _status.shard};
|
||||
co_await cf.parallel_foreach_table_state([&] (compaction::table_state& ts) mutable -> future<> {
|
||||
auto r = co_await cm.perform_sstable_scrub(ts, _opts, info);
|
||||
auto r = co_await cm.perform_sstable_scrub(ts, _opts);
|
||||
_stats += r.value_or(sstables::compaction_stats{});
|
||||
});
|
||||
}
|
||||
|
||||
future<> table_reshaping_compaction_task_impl::run() {
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
auto total_size = co_await _dir.map_reduce0([&] (sstables::sstable_directory& d) -> future<uint64_t> {
|
||||
uint64_t total_shard_size;
|
||||
tasks::task_info parent_info{_status.id, _status.shard};
|
||||
auto& compaction_module = _db.local().get_compaction_manager().get_task_manager_module();
|
||||
auto task = co_await compaction_module.make_and_start_task<shard_reshaping_compaction_task_impl>(parent_info, _status.keyspace, _status.table, _status.id, d, _db, _mode, _creator, _filter, total_shard_size);
|
||||
co_await task->done();
|
||||
co_return total_shard_size;
|
||||
}, uint64_t(0), std::plus<uint64_t>());
|
||||
|
||||
if (total_size > 0) {
|
||||
auto duration = std::chrono::duration_cast<std::chrono::duration<float>>(std::chrono::steady_clock::now() - start);
|
||||
dblog.info("Reshaped {} in {:.2f} seconds, {}", utils::pretty_printed_data_size(total_size), duration.count(), utils::pretty_printed_throughput(total_size, duration));
|
||||
}
|
||||
}
|
||||
|
||||
future<> shard_reshaping_compaction_task_impl::run() {
|
||||
auto& table = _db.local().find_column_family(_status.keyspace, _status.table);
|
||||
uint64_t reshaped_size = 0;
|
||||
tasks::task_info info{_status.id, _status.shard};
|
||||
|
||||
while (true) {
|
||||
auto reshape_candidates = boost::copy_range<std::vector<sstables::shared_sstable>>(_dir.get_unshared_local_sstables()
|
||||
| boost::adaptors::filtered([&filter = _filter] (const auto& sst) {
|
||||
return filter(sst);
|
||||
}));
|
||||
auto desc = table.get_compaction_strategy().get_reshaping_job(std::move(reshape_candidates), table.schema(), _mode);
|
||||
if (desc.sstables.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!reshaped_size) {
|
||||
dblog.info("Table {}.{} with compaction strategy {} found SSTables that need reshape. Starting reshape process", table.schema()->ks_name(), table.schema()->cf_name(), table.get_compaction_strategy().name());
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> sstlist;
|
||||
for (auto& sst : desc.sstables) {
|
||||
reshaped_size += sst->data_size();
|
||||
sstlist.push_back(sst);
|
||||
}
|
||||
|
||||
desc.creator = _creator;
|
||||
|
||||
std::exception_ptr ex;
|
||||
try {
|
||||
co_await table.get_compaction_manager().run_custom_job(table.as_table_state(), sstables::compaction_type::Reshape, "Reshape compaction", [&dir = _dir, &table, sstlist = std::move(sstlist), desc = std::move(desc)] (sstables::compaction_data& info, sstables::compaction_progress_monitor& progress_monitor) mutable -> future<> {
|
||||
sstables::compaction_result result = co_await sstables::compact_sstables(std::move(desc), info, table.as_table_state(), progress_monitor);
|
||||
co_await dir.remove_unshared_sstables(std::move(sstlist));
|
||||
co_await dir.collect_output_unshared_sstables(std::move(result.new_sstables), sstables::sstable_directory::can_be_remote::no);
|
||||
}, info, throw_if_stopping::yes);
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
|
||||
if (ex != nullptr) {
|
||||
try {
|
||||
std::rethrow_exception(std::move(ex));
|
||||
} catch (sstables::compaction_stopped_exception& e) {
|
||||
dblog.info("Table {}.{} with compaction strategy {} had reshape successfully aborted.", table.schema()->ks_name(), table.schema()->cf_name(), table.get_compaction_strategy().name());
|
||||
break;
|
||||
} catch (...) {
|
||||
dblog.info("Reshape failed for Table {}.{} with compaction strategy {} due to {}", table.schema()->ks_name(), table.schema()->cf_name(), table.get_compaction_strategy().name(), std::current_exception());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
|
||||
_total_shard_size = reshaped_size;
|
||||
}
|
||||
|
||||
future<> table_resharding_compaction_task_impl::run() {
|
||||
auto all_jobs = co_await collect_all_shared_sstables(_dir, _db, _status.keyspace, _status.table, _owned_ranges_ptr);
|
||||
auto destinations = co_await distribute_reshard_jobs(std::move(all_jobs));
|
||||
|
||||
uint64_t total_size = boost::accumulate(destinations | boost::adaptors::transformed(std::mem_fn(&replica::reshard_shard_descriptor::size)), uint64_t(0));
|
||||
if (total_size == 0) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
dblog.info("Resharding {} for {}.{}", utils::pretty_printed_data_size(total_size), _status.keyspace, _status.table);
|
||||
|
||||
co_await _db.invoke_on_all(coroutine::lambda([&] (replica::database& db) -> future<> {
|
||||
tasks::task_info parent_info{_status.id, _status.shard};
|
||||
auto& compaction_module = _db.local().get_compaction_manager().get_task_manager_module();
|
||||
// make shard-local copy of owned_ranges
|
||||
compaction::owned_ranges_ptr local_owned_ranges_ptr;
|
||||
if (_owned_ranges_ptr) {
|
||||
local_owned_ranges_ptr = make_lw_shared<const dht::token_range_vector>(*_owned_ranges_ptr);
|
||||
}
|
||||
auto task = co_await compaction_module.make_and_start_task<shard_resharding_compaction_task_impl>(parent_info, _status.keyspace, _status.table, _status.id, _dir, db, _creator, std::move(local_owned_ranges_ptr), destinations);
|
||||
co_await task->done();
|
||||
}));
|
||||
|
||||
auto duration = std::chrono::duration_cast<std::chrono::duration<float>>(std::chrono::steady_clock::now() - start);
|
||||
dblog.info("Resharded {} for {}.{} in {:.2f} seconds, {}", utils::pretty_printed_data_size(total_size), _status.keyspace, _status.table, duration.count(), utils::pretty_printed_throughput(total_size, duration));
|
||||
}
|
||||
|
||||
future<> shard_resharding_compaction_task_impl::run() {
|
||||
auto& table = _db.find_column_family(_status.keyspace, _status.table);
|
||||
auto info_vec = std::move(_destinations[this_shard_id()].info_vec);
|
||||
tasks::task_info info{_status.id, _status.shard};
|
||||
co_await reshard(_dir.local(), std::move(info_vec), table, _creator, std::move(_local_owned_ranges_ptr), info);
|
||||
co_await _dir.local().move_foreign_sstables(_dir);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -13,14 +13,6 @@
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "tasks/task_manager.hh"
|
||||
|
||||
namespace sstables {
|
||||
class sstable_directory;
|
||||
}
|
||||
|
||||
namespace replica {
|
||||
class reshard_shard_descriptor;
|
||||
}
|
||||
|
||||
namespace compaction {
|
||||
|
||||
class compaction_task_impl : public tasks::task_manager::task::impl {
|
||||
@@ -28,12 +20,11 @@ public:
|
||||
compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
tasks::task_id id,
|
||||
unsigned sequence_number,
|
||||
std::string scope,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
std::string entity,
|
||||
tasks::task_id parent_id) noexcept
|
||||
: tasks::task_manager::task::impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
: tasks::task_manager::task::impl(module, id, sequence_number, std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
{
|
||||
// FIXME: add progress units
|
||||
}
|
||||
@@ -41,8 +32,6 @@ public:
|
||||
virtual std::string type() const override = 0;
|
||||
protected:
|
||||
virtual future<> run() override = 0;
|
||||
|
||||
future<tasks::task_manager::task::progress> get_progress(const sstables::compaction_data& cdata, const sstables::compaction_progress_monitor& progress_monitor) const;
|
||||
};
|
||||
|
||||
class major_compaction_task_impl : public compaction_task_impl {
|
||||
@@ -50,12 +39,11 @@ public:
|
||||
major_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
tasks::task_id id,
|
||||
unsigned sequence_number,
|
||||
std::string scope,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
std::string entity,
|
||||
tasks::task_id parent_id) noexcept
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
{
|
||||
// FIXME: add progress units
|
||||
}
|
||||
@@ -70,13 +58,13 @@ protected:
|
||||
class major_keyspace_compaction_task_impl : public major_compaction_task_impl {
|
||||
private:
|
||||
sharded<replica::database>& _db;
|
||||
std::vector<table_info> _table_infos;
|
||||
std::vector<table_id> _table_infos;
|
||||
public:
|
||||
major_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
sharded<replica::database>& db,
|
||||
std::vector<table_info> table_infos) noexcept
|
||||
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "keyspace", std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
std::vector<table_id> table_infos) noexcept
|
||||
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
, _db(db)
|
||||
, _table_infos(std::move(table_infos))
|
||||
{}
|
||||
@@ -87,58 +75,33 @@ protected:
|
||||
class shard_major_keyspace_compaction_task_impl : public major_compaction_task_impl {
|
||||
private:
|
||||
replica::database& _db;
|
||||
std::vector<table_info> _local_tables;
|
||||
std::vector<table_id> _local_tables;
|
||||
public:
|
||||
shard_major_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
tasks::task_id parent_id,
|
||||
replica::database& db,
|
||||
std::vector<table_info> local_tables) noexcept
|
||||
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), "", "", parent_id)
|
||||
std::vector<table_id> local_tables) noexcept
|
||||
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", parent_id)
|
||||
, _db(db)
|
||||
, _local_tables(std::move(local_tables))
|
||||
{}
|
||||
|
||||
virtual tasks::is_internal is_internal() const noexcept override;
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
class table_major_keyspace_compaction_task_impl : public major_compaction_task_impl {
|
||||
private:
|
||||
replica::database& _db;
|
||||
table_info _ti;
|
||||
seastar::condition_variable& _cv;
|
||||
tasks::task_manager::task_ptr& _current_task;
|
||||
public:
|
||||
table_major_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
tasks::task_id parent_id,
|
||||
replica::database& db,
|
||||
table_info ti,
|
||||
seastar::condition_variable& cv,
|
||||
tasks::task_manager::task_ptr& current_task) noexcept
|
||||
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "table", std::move(keyspace), std::move(table), "", parent_id)
|
||||
, _db(db)
|
||||
, _ti(std::move(ti))
|
||||
, _cv(cv)
|
||||
, _current_task(current_task)
|
||||
{}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
|
||||
class cleanup_compaction_task_impl : public compaction_task_impl {
|
||||
public:
|
||||
cleanup_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
tasks::task_id id,
|
||||
unsigned sequence_number,
|
||||
std::string scope,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
std::string entity,
|
||||
tasks::task_id parent_id) noexcept
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
{
|
||||
// FIXME: add progress units
|
||||
}
|
||||
@@ -153,15 +116,15 @@ protected:
|
||||
class cleanup_keyspace_compaction_task_impl : public cleanup_compaction_task_impl {
|
||||
private:
|
||||
sharded<replica::database>& _db;
|
||||
std::vector<table_info> _table_infos;
|
||||
std::vector<table_id> _table_ids;
|
||||
public:
|
||||
cleanup_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
sharded<replica::database>& db,
|
||||
std::vector<table_info> table_infos) noexcept
|
||||
: cleanup_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "keyspace", std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
std::vector<table_id> table_ids) noexcept
|
||||
: cleanup_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
, _db(db)
|
||||
, _table_infos(std::move(table_infos))
|
||||
, _table_ids(std::move(table_ids))
|
||||
{}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
@@ -170,42 +133,19 @@ protected:
|
||||
class shard_cleanup_keyspace_compaction_task_impl : public cleanup_compaction_task_impl {
|
||||
private:
|
||||
replica::database& _db;
|
||||
std::vector<table_info> _local_tables;
|
||||
std::vector<table_id> _local_tables;
|
||||
public:
|
||||
shard_cleanup_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
tasks::task_id parent_id,
|
||||
replica::database& db,
|
||||
std::vector<table_info> local_tables) noexcept
|
||||
: cleanup_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), "", "", parent_id)
|
||||
std::vector<table_id> local_tables) noexcept
|
||||
: cleanup_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", parent_id)
|
||||
, _db(db)
|
||||
, _local_tables(std::move(local_tables))
|
||||
{}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
class table_cleanup_keyspace_compaction_task_impl : public cleanup_compaction_task_impl {
|
||||
private:
|
||||
replica::database& _db;
|
||||
table_info _ti;
|
||||
seastar::condition_variable& _cv;
|
||||
tasks::task_manager::task_ptr& _current_task;
|
||||
public:
|
||||
table_cleanup_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
tasks::task_id parent_id,
|
||||
replica::database& db,
|
||||
table_info ti,
|
||||
seastar::condition_variable& cv,
|
||||
tasks::task_manager::task_ptr& current_task) noexcept
|
||||
: cleanup_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "table", std::move(keyspace), std::move(table), "", parent_id)
|
||||
, _db(db)
|
||||
, _ti(std::move(ti))
|
||||
, _cv(cv)
|
||||
, _current_task(current_task)
|
||||
{}
|
||||
virtual tasks::is_internal is_internal() const noexcept override;
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
@@ -215,12 +155,11 @@ public:
|
||||
offstrategy_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
tasks::task_id id,
|
||||
unsigned sequence_number,
|
||||
std::string scope,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
std::string entity,
|
||||
tasks::task_id parent_id) noexcept
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
{
|
||||
// FIXME: add progress units
|
||||
}
|
||||
@@ -235,15 +174,15 @@ protected:
|
||||
class offstrategy_keyspace_compaction_task_impl : public offstrategy_compaction_task_impl {
|
||||
private:
|
||||
sharded<replica::database>& _db;
|
||||
std::vector<table_info> _table_infos;
|
||||
std::vector<table_id> _table_infos;
|
||||
bool& _needed;
|
||||
public:
|
||||
offstrategy_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
sharded<replica::database>& db,
|
||||
std::vector<table_info> table_infos,
|
||||
std::vector<table_id> table_infos,
|
||||
bool& needed) noexcept
|
||||
: offstrategy_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "keyspace", std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
: offstrategy_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
, _db(db)
|
||||
, _table_infos(std::move(table_infos))
|
||||
, _needed(needed)
|
||||
@@ -255,48 +194,22 @@ protected:
|
||||
class shard_offstrategy_keyspace_compaction_task_impl : public offstrategy_compaction_task_impl {
|
||||
private:
|
||||
replica::database& _db;
|
||||
std::vector<table_info> _table_infos;
|
||||
std::vector<table_id> _table_infos;
|
||||
bool& _needed;
|
||||
public:
|
||||
shard_offstrategy_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
tasks::task_id parent_id,
|
||||
replica::database& db,
|
||||
std::vector<table_info> table_infos,
|
||||
std::vector<table_id> table_infos,
|
||||
bool& needed) noexcept
|
||||
: offstrategy_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), "", "", parent_id)
|
||||
: offstrategy_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", parent_id)
|
||||
, _db(db)
|
||||
, _table_infos(std::move(table_infos))
|
||||
, _needed(needed)
|
||||
{}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
class table_offstrategy_keyspace_compaction_task_impl : public offstrategy_compaction_task_impl {
|
||||
private:
|
||||
replica::database& _db;
|
||||
table_info _ti;
|
||||
seastar::condition_variable& _cv;
|
||||
tasks::task_manager::task_ptr& _current_task;
|
||||
bool& _needed;
|
||||
public:
|
||||
table_offstrategy_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
tasks::task_id parent_id,
|
||||
replica::database& db,
|
||||
table_info ti,
|
||||
seastar::condition_variable& cv,
|
||||
tasks::task_manager::task_ptr& current_task,
|
||||
bool& needed) noexcept
|
||||
: offstrategy_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "table", std::move(keyspace), std::move(table), "", parent_id)
|
||||
, _db(db)
|
||||
, _ti(std::move(ti))
|
||||
, _cv(cv)
|
||||
, _current_task(current_task)
|
||||
, _needed(needed)
|
||||
{}
|
||||
virtual tasks::is_internal is_internal() const noexcept override;
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
@@ -306,18 +219,17 @@ public:
|
||||
sstables_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
tasks::task_id id,
|
||||
unsigned sequence_number,
|
||||
std::string scope,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
std::string entity,
|
||||
tasks::task_id parent_id) noexcept
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
{
|
||||
// FIXME: add progress units
|
||||
}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "sstables compaction";
|
||||
return "rewrite sstables compaction";
|
||||
}
|
||||
protected:
|
||||
virtual future<> run() override = 0;
|
||||
@@ -326,23 +238,19 @@ protected:
|
||||
class upgrade_sstables_compaction_task_impl : public sstables_compaction_task_impl {
|
||||
private:
|
||||
sharded<replica::database>& _db;
|
||||
std::vector<table_info> _table_infos;
|
||||
std::vector<table_id> _table_infos;
|
||||
bool _exclude_current_version;
|
||||
public:
|
||||
upgrade_sstables_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
sharded<replica::database>& db,
|
||||
std::vector<table_info> table_infos,
|
||||
std::vector<table_id> table_infos,
|
||||
bool exclude_current_version) noexcept
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "keyspace", std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
, _db(db)
|
||||
, _table_infos(std::move(table_infos))
|
||||
, _exclude_current_version(exclude_current_version)
|
||||
{}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "upgrade " + sstables_compaction_task_impl::type();
|
||||
}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
@@ -350,56 +258,22 @@ protected:
|
||||
class shard_upgrade_sstables_compaction_task_impl : public sstables_compaction_task_impl {
|
||||
private:
|
||||
replica::database& _db;
|
||||
std::vector<table_info> _table_infos;
|
||||
std::vector<table_id> _table_infos;
|
||||
bool _exclude_current_version;
|
||||
public:
|
||||
shard_upgrade_sstables_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
tasks::task_id parent_id,
|
||||
replica::database& db,
|
||||
std::vector<table_info> table_infos,
|
||||
std::vector<table_id> table_infos,
|
||||
bool exclude_current_version) noexcept
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), "", "", parent_id)
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", parent_id)
|
||||
, _db(db)
|
||||
, _table_infos(std::move(table_infos))
|
||||
, _exclude_current_version(exclude_current_version)
|
||||
{}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "upgrade " + sstables_compaction_task_impl::type();
|
||||
}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
class table_upgrade_sstables_compaction_task_impl : public sstables_compaction_task_impl {
|
||||
private:
|
||||
replica::database& _db;
|
||||
table_info _ti;
|
||||
seastar::condition_variable& _cv;
|
||||
tasks::task_manager::task_ptr& _current_task;
|
||||
bool _exclude_current_version;
|
||||
public:
|
||||
table_upgrade_sstables_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
tasks::task_id parent_id,
|
||||
replica::database& db,
|
||||
table_info ti,
|
||||
seastar::condition_variable& cv,
|
||||
tasks::task_manager::task_ptr& current_task,
|
||||
bool exclude_current_version) noexcept
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "table", std::move(keyspace), std::move(table), "", parent_id)
|
||||
, _db(db)
|
||||
, _ti(std::move(ti))
|
||||
, _cv(cv)
|
||||
, _current_task(current_task)
|
||||
, _exclude_current_version(exclude_current_version)
|
||||
{}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "upgrade " + sstables_compaction_task_impl::type();
|
||||
}
|
||||
virtual tasks::is_internal is_internal() const noexcept override;
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
@@ -417,16 +291,12 @@ public:
|
||||
std::vector<sstring> column_families,
|
||||
sstables::compaction_type_options::scrub opts,
|
||||
sstables::compaction_stats& stats) noexcept
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "keyspace", std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
||||
, _db(db)
|
||||
, _column_families(std::move(column_families))
|
||||
, _opts(opts)
|
||||
, _stats(stats)
|
||||
{}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "scrub " + sstables_compaction_task_impl::type();
|
||||
}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
@@ -445,16 +315,14 @@ public:
|
||||
std::vector<sstring> column_families,
|
||||
sstables::compaction_type_options::scrub opts,
|
||||
sstables::compaction_stats& stats) noexcept
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), "", "", parent_id)
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), "", "", parent_id)
|
||||
, _db(db)
|
||||
, _column_families(std::move(column_families))
|
||||
, _opts(opts)
|
||||
, _stats(stats)
|
||||
{}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "scrub " + sstables_compaction_task_impl::type();
|
||||
}
|
||||
virtual tasks::is_internal is_internal() const noexcept override;
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
@@ -472,170 +340,13 @@ public:
|
||||
replica::database& db,
|
||||
sstables::compaction_type_options::scrub opts,
|
||||
sstables::compaction_stats& stats) noexcept
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "table", std::move(keyspace), std::move(table), "", parent_id)
|
||||
: sstables_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), std::move(keyspace), std::move(table), "", parent_id)
|
||||
, _db(db)
|
||||
, _opts(opts)
|
||||
, _stats(stats)
|
||||
{}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "scrub " + sstables_compaction_task_impl::type();
|
||||
}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
class reshaping_compaction_task_impl : public compaction_task_impl {
|
||||
public:
|
||||
reshaping_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
tasks::task_id id,
|
||||
unsigned sequence_number,
|
||||
std::string scope,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
std::string entity,
|
||||
tasks::task_id parent_id) noexcept
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
{
|
||||
// FIXME: add progress units
|
||||
}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "reshaping compaction";
|
||||
}
|
||||
protected:
|
||||
virtual future<> run() override = 0;
|
||||
};
|
||||
|
||||
class table_reshaping_compaction_task_impl : public reshaping_compaction_task_impl {
|
||||
private:
|
||||
sharded<sstables::sstable_directory>& _dir;
|
||||
sharded<replica::database>& _db;
|
||||
sstables::reshape_mode _mode;
|
||||
sstables::compaction_sstable_creator_fn _creator;
|
||||
std::function<bool (const sstables::shared_sstable&)> _filter;
|
||||
public:
|
||||
table_reshaping_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
sharded<sstables::sstable_directory>& dir,
|
||||
sharded<replica::database>& db,
|
||||
sstables::reshape_mode mode,
|
||||
sstables::compaction_sstable_creator_fn creator,
|
||||
std::function<bool (const sstables::shared_sstable&)> filter) noexcept
|
||||
: reshaping_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "table", std::move(keyspace), std::move(table), "", tasks::task_id::create_null_id())
|
||||
, _dir(dir)
|
||||
, _db(db)
|
||||
, _mode(mode)
|
||||
, _creator(std::move(creator))
|
||||
, _filter(std::move(filter))
|
||||
{}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
class shard_reshaping_compaction_task_impl : public reshaping_compaction_task_impl {
|
||||
private:
|
||||
sstables::sstable_directory& _dir;
|
||||
sharded<replica::database>& _db;
|
||||
sstables::reshape_mode _mode;
|
||||
sstables::compaction_sstable_creator_fn _creator;
|
||||
std::function<bool (const sstables::shared_sstable&)> _filter;
|
||||
uint64_t& _total_shard_size;
|
||||
public:
|
||||
shard_reshaping_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
tasks::task_id parent_id,
|
||||
sstables::sstable_directory& dir,
|
||||
sharded<replica::database>& db,
|
||||
sstables::reshape_mode mode,
|
||||
sstables::compaction_sstable_creator_fn creator,
|
||||
std::function<bool (const sstables::shared_sstable&)> filter,
|
||||
uint64_t& total_shard_size) noexcept
|
||||
: reshaping_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), std::move(table), "", parent_id)
|
||||
, _dir(dir)
|
||||
, _db(db)
|
||||
, _mode(mode)
|
||||
, _creator(std::move(creator))
|
||||
, _filter(std::move(filter))
|
||||
, _total_shard_size(total_shard_size)
|
||||
{}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
|
||||
class resharding_compaction_task_impl : public compaction_task_impl {
|
||||
public:
|
||||
resharding_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
tasks::task_id id,
|
||||
unsigned sequence_number,
|
||||
std::string scope,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
std::string entity,
|
||||
tasks::task_id parent_id) noexcept
|
||||
: compaction_task_impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
{
|
||||
// FIXME: add progress units
|
||||
}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "resharding compaction";
|
||||
}
|
||||
protected:
|
||||
virtual future<> run() override = 0;
|
||||
};
|
||||
|
||||
class table_resharding_compaction_task_impl : public resharding_compaction_task_impl {
|
||||
private:
|
||||
sharded<sstables::sstable_directory>& _dir;
|
||||
sharded<replica::database>& _db;
|
||||
sstables::compaction_sstable_creator_fn _creator;
|
||||
compaction::owned_ranges_ptr _owned_ranges_ptr;
|
||||
public:
|
||||
table_resharding_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
sharded<sstables::sstable_directory>& dir,
|
||||
sharded<replica::database>& db,
|
||||
sstables::compaction_sstable_creator_fn creator,
|
||||
compaction::owned_ranges_ptr owned_ranges_ptr) noexcept
|
||||
: resharding_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "table", std::move(keyspace), std::move(table), "", tasks::task_id::create_null_id())
|
||||
, _dir(dir)
|
||||
, _db(db)
|
||||
, _creator(std::move(creator))
|
||||
, _owned_ranges_ptr(std::move(owned_ranges_ptr))
|
||||
{}
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
|
||||
class shard_resharding_compaction_task_impl : public resharding_compaction_task_impl {
|
||||
private:
|
||||
sharded<sstables::sstable_directory>& _dir;
|
||||
replica::database& _db;
|
||||
sstables::compaction_sstable_creator_fn _creator;
|
||||
compaction::owned_ranges_ptr _local_owned_ranges_ptr;
|
||||
std::vector<replica::reshard_shard_descriptor>& _destinations;
|
||||
public:
|
||||
shard_resharding_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
tasks::task_id parent_id,
|
||||
sharded<sstables::sstable_directory>& dir,
|
||||
replica::database& db,
|
||||
sstables::compaction_sstable_creator_fn creator,
|
||||
compaction::owned_ranges_ptr local_owned_ranges_ptr,
|
||||
std::vector<replica::reshard_shard_descriptor>& destinations) noexcept
|
||||
: resharding_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), std::move(table), "", parent_id)
|
||||
, _dir(dir)
|
||||
, _db(db)
|
||||
, _creator(std::move(creator))
|
||||
, _local_owned_ranges_ptr(std::move(local_owned_ranges_ptr))
|
||||
, _destinations(destinations)
|
||||
{}
|
||||
virtual tasks::is_internal is_internal() const noexcept override;
|
||||
protected:
|
||||
virtual future<> run() override;
|
||||
};
|
||||
@@ -645,25 +356,4 @@ public:
|
||||
task_manager_module(tasks::task_manager& tm) noexcept : tasks::task_manager::module(tm, "compaction") {}
|
||||
};
|
||||
|
||||
class regular_compaction_task_impl : public compaction_task_impl {
|
||||
public:
|
||||
regular_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
tasks::task_id id,
|
||||
unsigned sequence_number,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
std::string entity,
|
||||
tasks::task_id parent_id) noexcept
|
||||
: compaction_task_impl(module, id, sequence_number, "compaction group", std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||
{
|
||||
// FIXME: add progress units
|
||||
}
|
||||
|
||||
virtual std::string type() const override {
|
||||
return "regular compaction";
|
||||
}
|
||||
protected:
|
||||
virtual future<> run() override = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -26,104 +26,53 @@ time_window_compaction_strategy_state& time_window_compaction_strategy::get_stat
|
||||
return table_s.get_compaction_strategy_state().get<time_window_compaction_strategy_state>();
|
||||
}
|
||||
|
||||
const std::unordered_map<sstring, std::chrono::seconds> time_window_compaction_strategy_options::valid_window_units = {
|
||||
{ "MINUTES", 60s }, { "HOURS", 3600s }, { "DAYS", 86400s }
|
||||
};
|
||||
time_window_compaction_strategy_options::time_window_compaction_strategy_options(const std::map<sstring, sstring>& options) {
|
||||
std::chrono::seconds window_unit = DEFAULT_COMPACTION_WINDOW_UNIT;
|
||||
int window_size = DEFAULT_COMPACTION_WINDOW_SIZE;
|
||||
|
||||
const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions> time_window_compaction_strategy_options::valid_timestamp_resolutions = {
|
||||
{ "MICROSECONDS", timestamp_resolutions::microsecond },
|
||||
{ "MILLISECONDS", timestamp_resolutions::millisecond },
|
||||
};
|
||||
|
||||
static std::chrono::seconds validate_compaction_window_unit(const std::map<sstring, sstring>& options) {
|
||||
std::chrono::seconds window_unit = time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_UNIT;
|
||||
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY);
|
||||
if (tmp_value) {
|
||||
auto valid_window_units_it = time_window_compaction_strategy_options::valid_window_units.find(tmp_value.value());
|
||||
if (valid_window_units_it == time_window_compaction_strategy_options::valid_window_units.end()) {
|
||||
throw exceptions::configuration_exception(fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
|
||||
auto it = options.find(COMPACTION_WINDOW_UNIT_KEY);
|
||||
if (it != options.end()) {
|
||||
auto valid_window_units_it = valid_window_units.find(it->second);
|
||||
if (valid_window_units_it == valid_window_units.end()) {
|
||||
throw exceptions::syntax_exception(sstring("Invalid window unit ") + it->second + " for " + COMPACTION_WINDOW_UNIT_KEY);
|
||||
}
|
||||
window_unit = valid_window_units_it->second;
|
||||
}
|
||||
|
||||
return window_unit;
|
||||
}
|
||||
|
||||
static std::chrono::seconds validate_compaction_window_unit(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
auto window_unit = validate_compaction_window_unit(options);
|
||||
unchecked_options.erase(time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY);
|
||||
return window_unit;
|
||||
}
|
||||
|
||||
static int validate_compaction_window_size(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY);
|
||||
int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value, time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);
|
||||
it = options.find(COMPACTION_WINDOW_SIZE_KEY);
|
||||
if (it != options.end()) {
|
||||
try {
|
||||
window_size = std::stoi(it->second);
|
||||
} catch (const std::exception& e) {
|
||||
throw exceptions::syntax_exception(sstring("Invalid integer value ") + it->second + " for " + COMPACTION_WINDOW_SIZE_KEY);
|
||||
}
|
||||
}
|
||||
|
||||
if (window_size <= 0) {
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
|
||||
throw exceptions::configuration_exception(fmt::format("{} must be greater than 1 for compaction_window_size", window_size));
|
||||
}
|
||||
|
||||
return window_size;
|
||||
}
|
||||
|
||||
static int validate_compaction_window_size(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
int window_size = validate_compaction_window_size(options);
|
||||
unchecked_options.erase(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY);
|
||||
return window_size;
|
||||
}
|
||||
|
||||
static db_clock::duration validate_expired_sstable_check_frequency_seconds(const std::map<sstring, sstring>& options) {
|
||||
db_clock::duration expired_sstable_check_frequency = time_window_compaction_strategy_options::DEFAULT_EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS();
|
||||
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
|
||||
if (tmp_value) {
|
||||
try {
|
||||
expired_sstable_check_frequency = std::chrono::seconds(std::stol(tmp_value.value()));
|
||||
} catch (const std::exception& e) {
|
||||
throw exceptions::syntax_exception(fmt::format("Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
|
||||
}
|
||||
}
|
||||
|
||||
return expired_sstable_check_frequency;
|
||||
}
|
||||
|
||||
static db_clock::duration validate_expired_sstable_check_frequency_seconds(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
db_clock::duration expired_sstable_check_frequency = validate_expired_sstable_check_frequency_seconds(options);
|
||||
unchecked_options.erase(time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
|
||||
return expired_sstable_check_frequency;
|
||||
}
|
||||
|
||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options) {
|
||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = time_window_compaction_strategy_options::timestamp_resolutions::microsecond;
|
||||
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
||||
if (tmp_value) {
|
||||
if (!time_window_compaction_strategy_options::valid_timestamp_resolutions.contains(tmp_value.value())) {
|
||||
throw exceptions::configuration_exception(fmt::format("Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
|
||||
} else {
|
||||
timestamp_resolution = time_window_compaction_strategy_options::valid_timestamp_resolutions.at(tmp_value.value());
|
||||
}
|
||||
}
|
||||
|
||||
return timestamp_resolution;
|
||||
}
|
||||
|
||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = validate_timestamp_resolution(options);
|
||||
unchecked_options.erase(time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
||||
return timestamp_resolution;
|
||||
}
|
||||
|
||||
time_window_compaction_strategy_options::time_window_compaction_strategy_options(const std::map<sstring, sstring>& options) {
|
||||
auto window_unit = validate_compaction_window_unit(options);
|
||||
int window_size = validate_compaction_window_size(options);
|
||||
|
||||
sstable_window_size = window_size * window_unit;
|
||||
expired_sstable_check_frequency = validate_expired_sstable_check_frequency_seconds(options);
|
||||
timestamp_resolution = validate_timestamp_resolution(options);
|
||||
|
||||
auto it = options.find("enable_optimized_twcs_queries");
|
||||
it = options.find(EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
|
||||
if (it != options.end()) {
|
||||
try {
|
||||
expired_sstable_check_frequency = std::chrono::seconds(std::stol(it->second));
|
||||
} catch (const std::exception& e) {
|
||||
throw exceptions::syntax_exception(sstring("Invalid long value ") + it->second + "for " + EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
|
||||
}
|
||||
}
|
||||
|
||||
it = options.find(TIMESTAMP_RESOLUTION_KEY);
|
||||
if (it != options.end()) {
|
||||
if (!valid_timestamp_resolutions.contains(it->second)) {
|
||||
throw exceptions::syntax_exception(sstring("Invalid timestamp resolution ") + it->second + "for " + TIMESTAMP_RESOLUTION_KEY);
|
||||
} else {
|
||||
timestamp_resolution = valid_timestamp_resolutions.at(it->second);
|
||||
}
|
||||
}
|
||||
|
||||
it = options.find("enable_optimized_twcs_queries");
|
||||
if (it != options.end() && it->second == "false") {
|
||||
enable_optimized_twcs_queries = false;
|
||||
}
|
||||
@@ -133,29 +82,6 @@ time_window_compaction_strategy_options::time_window_compaction_strategy_options
|
||||
|
||||
time_window_compaction_strategy_options::time_window_compaction_strategy_options(const time_window_compaction_strategy_options&) = default;
|
||||
|
||||
// options is a map of compaction strategy options and their values.
|
||||
// unchecked_options is an analogical map from which already checked options are deleted.
|
||||
// This helps making sure that only allowed options are being set.
|
||||
void time_window_compaction_strategy_options::validate(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
validate_compaction_window_unit(options, unchecked_options);
|
||||
validate_compaction_window_size(options, unchecked_options);
|
||||
validate_expired_sstable_check_frequency_seconds(options, unchecked_options);
|
||||
validate_timestamp_resolution(options, unchecked_options);
|
||||
compaction_strategy_impl::validate_min_max_threshold(options, unchecked_options);
|
||||
|
||||
auto it = options.find("enable_optimized_twcs_queries");
|
||||
if (it != options.end() && it->second != "true" && it->second != "false") {
|
||||
throw exceptions::configuration_exception(fmt::format("enable_optimized_twcs_queries value ({}) must be \"true\" or \"false\"", it->second));
|
||||
}
|
||||
unchecked_options.erase("enable_optimized_twcs_queries");
|
||||
|
||||
it = unchecked_options.find("unsafe_aggressive_sstable_expiration");
|
||||
if (it != unchecked_options.end()) {
|
||||
clogger.warn("unsafe_aggressive_sstable_expiration option is not supported for time window compaction strategy");
|
||||
unchecked_options.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
class classify_by_timestamp {
|
||||
time_window_compaction_strategy_options _options;
|
||||
std::vector<int64_t> _known_windows;
|
||||
@@ -212,7 +138,7 @@ reader_consumer_v2 time_window_compaction_strategy::make_interposer_consumer(con
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
||||
time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const {
|
||||
std::vector<shared_sstable> single_window;
|
||||
std::vector<shared_sstable> multi_window;
|
||||
|
||||
@@ -266,7 +192,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
||||
});
|
||||
multi_window.resize(max_sstables);
|
||||
}
|
||||
compaction_descriptor desc(std::move(multi_window));
|
||||
compaction_descriptor desc(std::move(multi_window), iop);
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
return desc;
|
||||
}
|
||||
@@ -285,14 +211,14 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
||||
}
|
||||
// reuse STCS reshape logic which will only compact similar-sized files, to increase overall efficiency
|
||||
// when reshaping time buckets containing a huge amount of files
|
||||
auto desc = size_tiered_compaction_strategy(_stcs_options).get_reshaping_job(std::move(ssts), schema, mode);
|
||||
auto desc = size_tiered_compaction_strategy(_stcs_options).get_reshaping_job(std::move(ssts), schema, iop, mode);
|
||||
if (!desc.sstables.empty()) {
|
||||
return desc;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!single_window.empty()) {
|
||||
compaction_descriptor desc(std::move(single_window));
|
||||
compaction_descriptor desc(std::move(single_window), iop);
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
return desc;
|
||||
}
|
||||
@@ -301,10 +227,9 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
time_window_compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control) {
|
||||
time_window_compaction_strategy::get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<shared_sstable> candidates) {
|
||||
auto& state = get_state(table_s);
|
||||
auto compaction_time = gc_clock::now();
|
||||
auto candidates = control.candidates(table_s);
|
||||
|
||||
if (candidates.empty()) {
|
||||
state.estimated_remaining_tasks = 0;
|
||||
@@ -319,7 +244,7 @@ time_window_compaction_strategy::get_sstables_for_compaction(table_state& table_
|
||||
auto expired = table_s.fully_expired_sstables(candidates, compaction_time);
|
||||
if (!expired.empty()) {
|
||||
clogger.debug("[{}] Going to compact {} expired sstables", fmt::ptr(this), expired.size());
|
||||
return compaction_descriptor(has_only_fully_expired::yes, std::vector<shared_sstable>(expired.begin(), expired.end()));
|
||||
return compaction_descriptor(has_only_fully_expired::yes, std::vector<shared_sstable>(expired.begin(), expired.end()), service::get_local_compaction_priority());
|
||||
}
|
||||
// Keep checking for fully_expired_sstables until we don't find
|
||||
// any among the candidates, meaning they are either already compacted
|
||||
@@ -331,7 +256,7 @@ time_window_compaction_strategy::get_sstables_for_compaction(table_state& table_
|
||||
|
||||
auto compaction_candidates = get_next_non_expired_sstables(table_s, control, std::move(candidates), compaction_time);
|
||||
clogger.debug("[{}] Going to compact {} non-expired sstables", fmt::ptr(this), compaction_candidates.size());
|
||||
return compaction_descriptor(std::move(compaction_candidates));
|
||||
return compaction_descriptor(std::move(compaction_candidates), service::get_local_compaction_priority());
|
||||
}
|
||||
|
||||
time_window_compaction_strategy::bucket_compaction_mode
|
||||
@@ -366,7 +291,7 @@ time_window_compaction_strategy::get_next_non_expired_sstables(table_state& tabl
|
||||
// if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
|
||||
// ratio is greater than threshold.
|
||||
auto e = boost::range::remove_if(non_expiring_sstables, [this, compaction_time, &table_s] (const shared_sstable& sst) -> bool {
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s);
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
|
||||
});
|
||||
non_expiring_sstables.erase(e, non_expiring_sstables.end());
|
||||
if (non_expiring_sstables.empty()) {
|
||||
@@ -503,7 +428,6 @@ void time_window_compaction_strategy::update_estimated_compaction_by_tasks(time_
|
||||
break;
|
||||
case bucket_compaction_mode::major:
|
||||
n++;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -11,9 +11,12 @@
|
||||
#pragma once
|
||||
|
||||
#include "compaction_strategy_impl.hh"
|
||||
#include "compaction.hh"
|
||||
#include "size_tiered_compaction_strategy.hh"
|
||||
#include "timestamp.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "sstables/shared_sstable.hh"
|
||||
#include "service/priority_manager.hh"
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -33,15 +36,18 @@ public:
|
||||
static constexpr auto COMPACTION_WINDOW_UNIT_KEY = "compaction_window_unit";
|
||||
static constexpr auto COMPACTION_WINDOW_SIZE_KEY = "compaction_window_size";
|
||||
static constexpr auto EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY = "expired_sstable_check_frequency_seconds";
|
||||
|
||||
static const std::unordered_map<sstring, std::chrono::seconds> valid_window_units;
|
||||
private:
|
||||
const std::unordered_map<sstring, std::chrono::seconds> valid_window_units = { { "MINUTES", 60s }, { "HOURS", 3600s }, { "DAYS", 86400s } };
|
||||
|
||||
enum class timestamp_resolutions {
|
||||
microsecond,
|
||||
millisecond,
|
||||
};
|
||||
static const std::unordered_map<sstring, timestamp_resolutions> valid_timestamp_resolutions;
|
||||
private:
|
||||
const std::unordered_map<sstring, timestamp_resolutions> valid_timestamp_resolutions = {
|
||||
{ "MICROSECONDS", timestamp_resolutions::microsecond },
|
||||
{ "MILLISECONDS", timestamp_resolutions::millisecond },
|
||||
};
|
||||
|
||||
std::chrono::seconds sstable_window_size = DEFAULT_COMPACTION_WINDOW_UNIT * DEFAULT_COMPACTION_WINDOW_SIZE;
|
||||
db_clock::duration expired_sstable_check_frequency = DEFAULT_EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS();
|
||||
timestamp_resolutions timestamp_resolution = timestamp_resolutions::microsecond;
|
||||
@@ -51,8 +57,6 @@ public:
|
||||
time_window_compaction_strategy_options(time_window_compaction_strategy_options&&);
|
||||
time_window_compaction_strategy_options(const std::map<sstring, sstring>& options);
|
||||
|
||||
static void validate(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
|
||||
public:
|
||||
std::chrono::seconds get_sstable_window_size() const { return sstable_window_size; }
|
||||
|
||||
friend class time_window_compaction_strategy;
|
||||
@@ -83,11 +87,9 @@ public:
|
||||
enum class bucket_compaction_mode { none, size_tiered, major };
|
||||
public:
|
||||
time_window_compaction_strategy(const std::map<sstring, sstring>& options);
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control) override;
|
||||
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<shared_sstable> candidates) override;
|
||||
|
||||
virtual std::vector<compaction_descriptor> get_cleanup_compaction_jobs(table_state& table_s, std::vector<shared_sstable> candidates) const override;
|
||||
|
||||
static void validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
|
||||
private:
|
||||
time_window_compaction_strategy_state& get_state(table_state& table_s) const;
|
||||
|
||||
@@ -170,7 +172,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user