Compare commits
144 Commits
next-4.5
...
branch-4.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92effccf52 | ||
|
|
7357529834 | ||
|
|
3dd7874f08 | ||
|
|
1bf218c29e | ||
|
|
89c47a44dc | ||
|
|
dd93f297c1 | ||
|
|
b0b2606a8c | ||
|
|
6de458e915 | ||
|
|
b6aa5ab2d4 | ||
|
|
08cbd180ff | ||
|
|
693c7b300a | ||
|
|
2e7f618632 | ||
|
|
5cd698c89d | ||
|
|
482fa83a0e | ||
|
|
cabb7fbd3b | ||
|
|
4d1c83a4e8 | ||
|
|
7da9884d09 | ||
|
|
b4242f01a8 | ||
|
|
27cd231f61 | ||
|
|
030419d5ed | ||
|
|
0d1362fc31 | ||
|
|
0888aa1717 | ||
|
|
690a96ff54 | ||
|
|
38cdf30a35 | ||
|
|
61b71e4da0 | ||
|
|
e4b42e622e | ||
|
|
e625144d6e | ||
|
|
76ec7513f1 | ||
|
|
f36f7035c8 | ||
|
|
709e934164 | ||
|
|
13428d56f6 | ||
|
|
2c1f5e5225 | ||
|
|
9ae3edb102 | ||
|
|
11851fa4d9 | ||
|
|
1a56e41f44 | ||
|
|
d737d56a08 | ||
|
|
e1c993fc13 | ||
|
|
0a6e38bf18 | ||
|
|
55bca74e90 | ||
|
|
162d466034 | ||
|
|
3e6d8c3fa7 | ||
|
|
5d3ff1e8a1 | ||
|
|
5358eaf1d6 | ||
|
|
e78b96ee49 | ||
|
|
add245a27e | ||
|
|
108f56c6ed | ||
|
|
d01ce491c0 | ||
|
|
7b2f65191c | ||
|
|
add5ffa787 | ||
|
|
32a1f2dcd9 | ||
|
|
f2072665d1 | ||
|
|
beb2bcb8bd | ||
|
|
8255b7984d | ||
|
|
28f5e0bd20 | ||
|
|
09f3bb93a3 | ||
|
|
76642eb00d | ||
|
|
a60f394d9a | ||
|
|
f2af68850c | ||
|
|
c7781f8c9e | ||
|
|
8f37924694 | ||
|
|
8588eef807 | ||
|
|
c50a2898cf | ||
|
|
44f7251809 | ||
|
|
fc070d3dc6 | ||
|
|
901784e122 | ||
|
|
2ccda04d57 | ||
|
|
e8facb1932 | ||
|
|
6f338e7656 | ||
|
|
7bb9230cfa | ||
|
|
2898e98733 | ||
|
|
2796b0050d | ||
|
|
6bc005643e | ||
|
|
d591ff5422 | ||
|
|
acb1c3eebf | ||
|
|
a04242ea62 | ||
|
|
7131c7c523 | ||
|
|
6af7cf8a39 | ||
|
|
e2d4940b6d | ||
|
|
09f9ff3f96 | ||
|
|
d671185828 | ||
|
|
8d1784805a | ||
|
|
1d4ce229eb | ||
|
|
ba9897a34e | ||
|
|
5cdc1fa662 | ||
|
|
81347037d3 | ||
|
|
49c3b812b9 | ||
|
|
6ffd23a957 | ||
|
|
a0b78956e8 | ||
|
|
74941f67e6 | ||
|
|
8c9c0807ef | ||
|
|
f316e1db54 | ||
|
|
675db3e65e | ||
|
|
5a45c2b947 | ||
|
|
b446cbad97 | ||
|
|
da2c5fd549 | ||
|
|
b44b814d94 | ||
|
|
46650adcd0 | ||
|
|
baeddc3cb5 | ||
|
|
33831c49cc | ||
|
|
47fc8389fb | ||
|
|
a7a979b794 | ||
|
|
413e03ce5e | ||
|
|
000585522e | ||
|
|
47b121130a | ||
|
|
15f55141ec | ||
|
|
69fbeaa27e | ||
|
|
a366de2a63 | ||
|
|
5bd52e4dba | ||
|
|
8a3a69bc3e | ||
|
|
50c01f7331 | ||
|
|
ecfe466e7b | ||
|
|
69e5caadb6 | ||
|
|
0ff3c0dcb5 | ||
|
|
2148a194c2 | ||
|
|
77ab7b1221 | ||
|
|
59bcd7f029 | ||
|
|
bc5008b165 | ||
|
|
dd7e3d3eab | ||
|
|
3b617164dc | ||
|
|
bb99d7ced6 | ||
|
|
9877246251 | ||
|
|
d966e2d500 | ||
|
|
81831d93d2 | ||
|
|
542a7d28a3 | ||
|
|
1310e6cb48 | ||
|
|
99a6ecb25d | ||
|
|
bc922a743f | ||
|
|
1ec4f50e3c | ||
|
|
9c7ff01c5d | ||
|
|
da29b65e04 | ||
|
|
8c3e8350d6 | ||
|
|
708588bf8b | ||
|
|
b2271800a5 | ||
|
|
209c3512e7 | ||
|
|
4896ce0fd4 | ||
|
|
9d84b1f13d | ||
|
|
a8e372bf94 | ||
|
|
17e5ac9ab1 | ||
|
|
d1d968c6e9 | ||
|
|
e186f66bfe | ||
|
|
78a39e8364 | ||
|
|
bbef05ae3c | ||
|
|
6f324cb732 | ||
|
|
239499a35a |
6
.github/CODEOWNERS
vendored
6
.github/CODEOWNERS
vendored
@@ -79,9 +79,3 @@ db/hints/* @haaawk @piodul @vladzcloudius
|
||||
# REDIS
|
||||
redis/* @nyh @syuu1228
|
||||
redis-test/* @nyh @syuu1228
|
||||
|
||||
# READERS
|
||||
reader_* @denesb
|
||||
querier* @denesb
|
||||
test/boost/mutation_reader_test.cc @denesb
|
||||
test/boost/querier_cache_test.cc @denesb
|
||||
|
||||
33
.github/workflows/pages.yml
vendored
33
.github/workflows/pages.yml
vendored
@@ -1,33 +0,0 @@
|
||||
name: "CI Docs"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- 'docs/**'
|
||||
jobs:
|
||||
release:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
LATEST_VERSION: master
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-depth: 0
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: 3.7
|
||||
- name: Build docs
|
||||
run: |
|
||||
export PATH=$PATH:~/.local/bin
|
||||
cd docs
|
||||
make multiversion
|
||||
- name: Deploy
|
||||
run : ./docs/_utils/deploy.sh
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -25,5 +25,3 @@ tags
|
||||
testlog
|
||||
test/*/*.reject
|
||||
.vscode
|
||||
docs/_build
|
||||
docs/poetry.lock
|
||||
|
||||
829
CMakeLists.txt
829
CMakeLists.txt
@@ -1,5 +1,8 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
##
|
||||
## For best results, first compile the project using the Ninja build-system.
|
||||
##
|
||||
|
||||
cmake_minimum_required(VERSION 3.7)
|
||||
project(scylla)
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
@@ -17,740 +20,138 @@ else()
|
||||
set(BUILD_TYPE "release")
|
||||
endif()
|
||||
|
||||
function(default_target_arch arch)
|
||||
set(x86_instruction_sets i386 i686 x86_64)
|
||||
if(CMAKE_SYSTEM_PROCESSOR IN_LIST x86_instruction_sets)
|
||||
set(${arch} "westmere" PARENT_SCOPE)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR EQUAL "aarch64")
|
||||
set(${arch} "armv8-a+crc+crypto" PARENT_SCOPE)
|
||||
else()
|
||||
set(${arch} "" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
default_target_arch(target_arch)
|
||||
if(target_arch)
|
||||
set(target_arch_flag "-march=${target_arch}")
|
||||
if (NOT DEFINED FOR_IDE AND NOT DEFINED ENV{FOR_IDE} AND NOT DEFINED ENV{CLION_IDE})
|
||||
message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in IDEs, please define FOR_IDE to acknowledge this.")
|
||||
endif()
|
||||
|
||||
# Configure Seastar compile options to align with Scylla
|
||||
set(Seastar_CXX_FLAGS -fcoroutines ${target_arch_flag} CACHE INTERNAL "" FORCE)
|
||||
set(Seastar_CXX_DIALECT gnu++20 CACHE INTERNAL "" FORCE)
|
||||
# These paths are always available, since they're included in the repository. Additional DPDK headers are placed while
|
||||
# Seastar is built, and are captured in `SEASTAR_INCLUDE_DIRS` through parsing the Seastar pkg-config file (below).
|
||||
set(SEASTAR_DPDK_INCLUDE_DIRS
|
||||
seastar/dpdk/lib/librte_eal/common/include
|
||||
seastar/dpdk/lib/librte_eal/common/include/generic
|
||||
seastar/dpdk/lib/librte_eal/common/include/x86
|
||||
seastar/dpdk/lib/librte_ether)
|
||||
|
||||
add_subdirectory(seastar)
|
||||
add_subdirectory(abseil)
|
||||
# Exclude absl::strerror from the default "all" target since it's not
|
||||
# used in Scylla build and, moreover, makes use of deprecated glibc APIs,
|
||||
# such as sys_nerr, which are not exposed from "stdio.h" since glibc 2.32,
|
||||
# which happens to be the case for recent Fedora distribution versions.
|
||||
#
|
||||
# Need to use the internal "absl_strerror" target name instead of namespaced
|
||||
# variant because `set_target_properties` does not understand the latter form,
|
||||
# unfortunately.
|
||||
set_target_properties(absl_strerror PROPERTIES EXCLUDE_FROM_ALL TRUE)
|
||||
find_package(PkgConfig REQUIRED)
|
||||
|
||||
# System libraries dependencies
|
||||
find_package(Boost COMPONENTS filesystem program_options system thread regex REQUIRED)
|
||||
find_package(Lua REQUIRED)
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(ICU COMPONENTS uc REQUIRED)
|
||||
set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/build/${BUILD_TYPE}/seastar:$ENV{PKG_CONFIG_PATH}")
|
||||
pkg_check_modules(SEASTAR seastar)
|
||||
|
||||
set(scylla_build_dir "${CMAKE_BINARY_DIR}/build/${BUILD_TYPE}")
|
||||
set(scylla_gen_build_dir "${scylla_build_dir}/gen")
|
||||
file(MAKE_DIRECTORY "${scylla_build_dir}" "${scylla_gen_build_dir}")
|
||||
if(NOT SEASTAR_INCLUDE_DIRS)
|
||||
# Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
|
||||
set(SEASTAR_INCLUDE_DIRS "seastar/include")
|
||||
endif()
|
||||
|
||||
# Place libraries, executables and archives in ${buildroot}/build/${mode}/
|
||||
foreach(mode RUNTIME LIBRARY ARCHIVE)
|
||||
set(CMAKE_${mode}_OUTPUT_DIRECTORY "${scylla_build_dir}")
|
||||
endforeach()
|
||||
find_package(Boost COMPONENTS filesystem program_options system thread)
|
||||
|
||||
# Generate C++ source files from thrift definitions
|
||||
function(scylla_generate_thrift)
|
||||
set(one_value_args TARGET VAR IN_FILE OUT_DIR SERVICE)
|
||||
cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
|
||||
##
|
||||
## Populate the names of all source and header files in the indicated paths in a designated variable.
|
||||
##
|
||||
## When RECURSIVE is specified, directories are traversed recursively.
|
||||
##
|
||||
## Use: scan_scylla_source_directories(VAR my_result_var [RECURSIVE] PATHS [path1 path2 ...])
|
||||
##
|
||||
function (scan_scylla_source_directories)
|
||||
set(options RECURSIVE)
|
||||
set(oneValueArgs VAR)
|
||||
set(multiValueArgs PATHS)
|
||||
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
|
||||
|
||||
get_filename_component(in_file_name ${args_IN_FILE} NAME_WE)
|
||||
set(globs "")
|
||||
|
||||
set(aux_out_file_name ${args_OUT_DIR}/${in_file_name})
|
||||
set(outputs
|
||||
${aux_out_file_name}_types.cpp
|
||||
${aux_out_file_name}_types.h
|
||||
${aux_out_file_name}_constants.cpp
|
||||
${aux_out_file_name}_constants.h
|
||||
${args_OUT_DIR}/${args_SERVICE}.cpp
|
||||
${args_OUT_DIR}/${args_SERVICE}.h)
|
||||
foreach (dir ${args_PATHS})
|
||||
list(APPEND globs "${dir}/*.cc" "${dir}/*.hh")
|
||||
endforeach()
|
||||
|
||||
add_custom_command(
|
||||
DEPENDS
|
||||
${args_IN_FILE}
|
||||
thrift
|
||||
OUTPUT ${outputs}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${args_OUT_DIR}
|
||||
COMMAND thrift -gen cpp:cob_style,no_skeleton -out "${args_OUT_DIR}" "${args_IN_FILE}")
|
||||
if (args_RECURSIVE)
|
||||
set(glob_kind GLOB_RECURSE)
|
||||
else()
|
||||
set(glob_kind GLOB)
|
||||
endif()
|
||||
|
||||
add_custom_target(${args_TARGET}
|
||||
DEPENDS ${outputs})
|
||||
file(${glob_kind} var
|
||||
${globs})
|
||||
|
||||
set(${args_VAR} ${outputs} PARENT_SCOPE)
|
||||
set(${args_VAR} ${var} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
scylla_generate_thrift(
|
||||
TARGET scylla_thrift_gen_cassandra
|
||||
VAR scylla_thrift_gen_cassandra_files
|
||||
IN_FILE interface/cassandra.thrift
|
||||
OUT_DIR ${scylla_gen_build_dir}
|
||||
SERVICE Cassandra)
|
||||
## Although Seastar is an external project, it is common enough to explore the sources while doing
|
||||
## Scylla development that we'll treat the Seastar sources as part of this project for easier navigation.
|
||||
scan_scylla_source_directories(
|
||||
VAR SEASTAR_SOURCE_FILES
|
||||
RECURSIVE
|
||||
|
||||
# Parse antlr3 grammar files and generate C++ sources
|
||||
function(scylla_generate_antlr3)
|
||||
set(one_value_args TARGET VAR IN_FILE OUT_DIR)
|
||||
cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
|
||||
PATHS
|
||||
seastar/core
|
||||
seastar/http
|
||||
seastar/json
|
||||
seastar/net
|
||||
seastar/rpc
|
||||
seastar/testing
|
||||
seastar/util)
|
||||
|
||||
get_filename_component(in_file_pure_name ${args_IN_FILE} NAME)
|
||||
get_filename_component(stem ${in_file_pure_name} NAME_WE)
|
||||
scan_scylla_source_directories(
|
||||
VAR SCYLLA_ROOT_SOURCE_FILES
|
||||
PATHS .)
|
||||
|
||||
set(outputs
|
||||
"${args_OUT_DIR}/${stem}Lexer.hpp"
|
||||
"${args_OUT_DIR}/${stem}Lexer.cpp"
|
||||
"${args_OUT_DIR}/${stem}Parser.hpp"
|
||||
"${args_OUT_DIR}/${stem}Parser.cpp")
|
||||
scan_scylla_source_directories(
|
||||
VAR SCYLLA_SUB_SOURCE_FILES
|
||||
RECURSIVE
|
||||
|
||||
add_custom_command(
|
||||
DEPENDS
|
||||
${args_IN_FILE}
|
||||
OUTPUT ${outputs}
|
||||
# Remove #ifdef'ed code from the grammar source code
|
||||
COMMAND sed -e "/^#if 0/,/^#endif/d" "${args_IN_FILE}" > "${args_OUT_DIR}/${in_file_pure_name}"
|
||||
COMMAND antlr3 "${args_OUT_DIR}/${in_file_pure_name}"
|
||||
# We replace many local `ExceptionBaseType* ex` variables with a single function-scope one.
|
||||
# Because we add such a variable to every function, and because `ExceptionBaseType` is not a global
|
||||
# name, we also add a global typedef to avoid compilation errors.
|
||||
COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Lexer.hpp"
|
||||
COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Lexer.cpp"
|
||||
COMMAND sed -i -e "/^.*On :.*$/d" "${args_OUT_DIR}/${stem}Parser.hpp"
|
||||
COMMAND sed -i
|
||||
-e "s/^\\( *\\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$/\\1const \\2/"
|
||||
-e "/^.*On :.*$/d"
|
||||
-e "1i using ExceptionBaseType = int;"
|
||||
-e "s/^{/{ ExceptionBaseType\\* ex = nullptr;/; s/ExceptionBaseType\\* ex = new/ex = new/; s/exceptions::syntax_exception e/exceptions::syntax_exception\\& e/"
|
||||
"${args_OUT_DIR}/${stem}Parser.cpp"
|
||||
VERBATIM)
|
||||
PATHS
|
||||
api
|
||||
auth
|
||||
cql3
|
||||
db
|
||||
dht
|
||||
exceptions
|
||||
gms
|
||||
index
|
||||
io
|
||||
locator
|
||||
message
|
||||
raft
|
||||
repair
|
||||
service
|
||||
sstables
|
||||
streaming
|
||||
test
|
||||
thrift
|
||||
tracing
|
||||
transport
|
||||
utils)
|
||||
|
||||
add_custom_target(${args_TARGET}
|
||||
DEPENDS ${outputs})
|
||||
scan_scylla_source_directories(
|
||||
VAR SCYLLA_GEN_SOURCE_FILES
|
||||
RECURSIVE
|
||||
PATHS build/${BUILD_TYPE}/gen)
|
||||
|
||||
set(${args_VAR} ${outputs} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
set(antlr3_grammar_files
|
||||
cql3/Cql.g
|
||||
alternator/expressions.g)
|
||||
|
||||
set(antlr3_gen_files)
|
||||
|
||||
foreach(f ${antlr3_grammar_files})
|
||||
get_filename_component(grammar_file_name "${f}" NAME_WE)
|
||||
get_filename_component(f_dir "${f}" DIRECTORY)
|
||||
scylla_generate_antlr3(
|
||||
TARGET scylla_antlr3_gen_${grammar_file_name}
|
||||
VAR scylla_antlr3_gen_${grammar_file_name}_files
|
||||
IN_FILE ${f}
|
||||
OUT_DIR ${scylla_gen_build_dir}/${f_dir})
|
||||
list(APPEND antlr3_gen_files "${scylla_antlr3_gen_${grammar_file_name}_files}")
|
||||
endforeach()
|
||||
|
||||
# Generate C++ sources from ragel grammar files
|
||||
seastar_generate_ragel(
|
||||
TARGET scylla_ragel_gen_protocol_parser
|
||||
VAR scylla_ragel_gen_protocol_parser_file
|
||||
IN_FILE redis/protocol_parser.rl
|
||||
OUT_FILE ${scylla_gen_build_dir}/redis/protocol_parser.hh)
|
||||
|
||||
# Generate C++ sources from Swagger definitions
|
||||
set(swagger_files
|
||||
api/api-doc/cache_service.json
|
||||
api/api-doc/collectd.json
|
||||
api/api-doc/column_family.json
|
||||
api/api-doc/commitlog.json
|
||||
api/api-doc/compaction_manager.json
|
||||
api/api-doc/config.json
|
||||
api/api-doc/endpoint_snitch_info.json
|
||||
api/api-doc/error_injection.json
|
||||
api/api-doc/failure_detector.json
|
||||
api/api-doc/gossiper.json
|
||||
api/api-doc/hinted_handoff.json
|
||||
api/api-doc/lsa.json
|
||||
api/api-doc/messaging_service.json
|
||||
api/api-doc/storage_proxy.json
|
||||
api/api-doc/storage_service.json
|
||||
api/api-doc/stream_manager.json
|
||||
api/api-doc/system.json
|
||||
api/api-doc/utils.json)
|
||||
|
||||
set(swagger_gen_files)
|
||||
|
||||
foreach(f ${swagger_files})
|
||||
get_filename_component(fname "${f}" NAME_WE)
|
||||
get_filename_component(dir "${f}" DIRECTORY)
|
||||
seastar_generate_swagger(
|
||||
TARGET scylla_swagger_gen_${fname}
|
||||
VAR scylla_swagger_gen_${fname}_files
|
||||
IN_FILE "${f}"
|
||||
OUT_DIR "${scylla_gen_build_dir}/${dir}")
|
||||
list(APPEND swagger_gen_files "${scylla_swagger_gen_${fname}_files}")
|
||||
endforeach()
|
||||
|
||||
# Create C++ bindings for IDL serializers
|
||||
function(scylla_generate_idl_serializer)
|
||||
set(one_value_args TARGET VAR IN_FILE OUT_FILE)
|
||||
cmake_parse_arguments(args "" "${one_value_args}" "" ${ARGN})
|
||||
get_filename_component(out_dir ${args_OUT_FILE} DIRECTORY)
|
||||
set(idl_compiler "${CMAKE_SOURCE_DIR}/idl-compiler.py")
|
||||
|
||||
find_package(Python3 COMPONENTS Interpreter)
|
||||
|
||||
add_custom_command(
|
||||
DEPENDS
|
||||
${args_IN_FILE}
|
||||
${idl_compiler}
|
||||
OUTPUT ${args_OUT_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${out_dir}
|
||||
COMMAND Python3::Interpreter ${idl_compiler} --ns ser -f ${args_IN_FILE} -o ${args_OUT_FILE})
|
||||
|
||||
add_custom_target(${args_TARGET}
|
||||
DEPENDS ${args_OUT_FILE})
|
||||
|
||||
set(${args_VAR} ${args_OUT_FILE} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
set(idl_serializers
|
||||
idl/cache_temperature.idl.hh
|
||||
idl/commitlog.idl.hh
|
||||
idl/consistency_level.idl.hh
|
||||
idl/frozen_mutation.idl.hh
|
||||
idl/frozen_schema.idl.hh
|
||||
idl/gossip_digest.idl.hh
|
||||
idl/idl_test.idl.hh
|
||||
idl/keys.idl.hh
|
||||
idl/messaging_service.idl.hh
|
||||
idl/mutation.idl.hh
|
||||
idl/paging_state.idl.hh
|
||||
idl/partition_checksum.idl.hh
|
||||
idl/paxos.idl.hh
|
||||
idl/query.idl.hh
|
||||
idl/range.idl.hh
|
||||
idl/read_command.idl.hh
|
||||
idl/reconcilable_result.idl.hh
|
||||
idl/replay_position.idl.hh
|
||||
idl/result.idl.hh
|
||||
idl/ring_position.idl.hh
|
||||
idl/streaming.idl.hh
|
||||
idl/token.idl.hh
|
||||
idl/tracing.idl.hh
|
||||
idl/truncation_record.idl.hh
|
||||
idl/uuid.idl.hh
|
||||
idl/view.idl.hh)
|
||||
|
||||
set(idl_gen_files)
|
||||
|
||||
foreach(f ${idl_serializers})
|
||||
get_filename_component(idl_name "${f}" NAME)
|
||||
get_filename_component(idl_target "${idl_name}" NAME_WE)
|
||||
get_filename_component(idl_dir "${f}" DIRECTORY)
|
||||
string(REPLACE ".idl.hh" ".dist.hh" idl_out_hdr_name "${idl_name}")
|
||||
scylla_generate_idl_serializer(
|
||||
TARGET scylla_idl_gen_${idl_target}
|
||||
VAR scylla_idl_gen_${idl_target}_files
|
||||
IN_FILE ${f}
|
||||
OUT_FILE ${scylla_gen_build_dir}/${idl_dir}/${idl_out_hdr_name})
|
||||
list(APPEND idl_gen_files "${scylla_idl_gen_${idl_target}_files}")
|
||||
endforeach()
|
||||
|
||||
set(scylla_sources
|
||||
absl-flat_hash_map.cc
|
||||
alternator/auth.cc
|
||||
alternator/base64.cc
|
||||
alternator/conditions.cc
|
||||
alternator/executor.cc
|
||||
alternator/expressions.cc
|
||||
alternator/serialization.cc
|
||||
alternator/server.cc
|
||||
alternator/stats.cc
|
||||
alternator/streams.cc
|
||||
api/api.cc
|
||||
api/cache_service.cc
|
||||
api/collectd.cc
|
||||
api/column_family.cc
|
||||
api/commitlog.cc
|
||||
api/compaction_manager.cc
|
||||
api/config.cc
|
||||
api/endpoint_snitch.cc
|
||||
api/error_injection.cc
|
||||
api/failure_detector.cc
|
||||
api/gossiper.cc
|
||||
api/hinted_handoff.cc
|
||||
api/lsa.cc
|
||||
api/messaging_service.cc
|
||||
api/storage_proxy.cc
|
||||
api/storage_service.cc
|
||||
api/stream_manager.cc
|
||||
api/system.cc
|
||||
atomic_cell.cc
|
||||
auth/allow_all_authenticator.cc
|
||||
auth/allow_all_authorizer.cc
|
||||
auth/authenticated_user.cc
|
||||
auth/authentication_options.cc
|
||||
auth/authenticator.cc
|
||||
auth/common.cc
|
||||
auth/default_authorizer.cc
|
||||
auth/password_authenticator.cc
|
||||
auth/passwords.cc
|
||||
auth/permission.cc
|
||||
auth/permissions_cache.cc
|
||||
auth/resource.cc
|
||||
auth/role_or_anonymous.cc
|
||||
auth/roles-metadata.cc
|
||||
auth/sasl_challenge.cc
|
||||
auth/service.cc
|
||||
auth/standard_role_manager.cc
|
||||
auth/transitional.cc
|
||||
bytes.cc
|
||||
canonical_mutation.cc
|
||||
cdc/cdc_partitioner.cc
|
||||
cdc/generation.cc
|
||||
cdc/log.cc
|
||||
cdc/metadata.cc
|
||||
cdc/split.cc
|
||||
clocks-impl.cc
|
||||
collection_mutation.cc
|
||||
compress.cc
|
||||
connection_notifier.cc
|
||||
converting_mutation_partition_applier.cc
|
||||
counters.cc
|
||||
cql3/abstract_marker.cc
|
||||
cql3/attributes.cc
|
||||
cql3/cf_name.cc
|
||||
cql3/column_condition.cc
|
||||
cql3/column_identifier.cc
|
||||
cql3/column_specification.cc
|
||||
cql3/constants.cc
|
||||
cql3/cql3_type.cc
|
||||
cql3/expr/expression.cc
|
||||
cql3/functions/aggregate_fcts.cc
|
||||
cql3/functions/castas_fcts.cc
|
||||
cql3/functions/error_injection_fcts.cc
|
||||
cql3/functions/functions.cc
|
||||
cql3/functions/user_function.cc
|
||||
cql3/index_name.cc
|
||||
cql3/keyspace_element_name.cc
|
||||
cql3/lists.cc
|
||||
cql3/maps.cc
|
||||
cql3/operation.cc
|
||||
cql3/query_options.cc
|
||||
cql3/query_processor.cc
|
||||
cql3/relation.cc
|
||||
cql3/restrictions/statement_restrictions.cc
|
||||
cql3/result_set.cc
|
||||
cql3/role_name.cc
|
||||
cql3/selection/abstract_function_selector.cc
|
||||
cql3/selection/selectable.cc
|
||||
cql3/selection/selection.cc
|
||||
cql3/selection/selector.cc
|
||||
cql3/selection/selector_factories.cc
|
||||
cql3/selection/simple_selector.cc
|
||||
cql3/sets.cc
|
||||
cql3/single_column_relation.cc
|
||||
cql3/statements/alter_keyspace_statement.cc
|
||||
cql3/statements/alter_table_statement.cc
|
||||
cql3/statements/alter_type_statement.cc
|
||||
cql3/statements/alter_view_statement.cc
|
||||
cql3/statements/authentication_statement.cc
|
||||
cql3/statements/authorization_statement.cc
|
||||
cql3/statements/batch_statement.cc
|
||||
cql3/statements/cas_request.cc
|
||||
cql3/statements/cf_prop_defs.cc
|
||||
cql3/statements/cf_statement.cc
|
||||
cql3/statements/create_function_statement.cc
|
||||
cql3/statements/create_index_statement.cc
|
||||
cql3/statements/create_keyspace_statement.cc
|
||||
cql3/statements/create_table_statement.cc
|
||||
cql3/statements/create_type_statement.cc
|
||||
cql3/statements/create_view_statement.cc
|
||||
cql3/statements/delete_statement.cc
|
||||
cql3/statements/drop_function_statement.cc
|
||||
cql3/statements/drop_index_statement.cc
|
||||
cql3/statements/drop_keyspace_statement.cc
|
||||
cql3/statements/drop_table_statement.cc
|
||||
cql3/statements/drop_type_statement.cc
|
||||
cql3/statements/drop_view_statement.cc
|
||||
cql3/statements/function_statement.cc
|
||||
cql3/statements/grant_statement.cc
|
||||
cql3/statements/index_prop_defs.cc
|
||||
cql3/statements/index_target.cc
|
||||
cql3/statements/ks_prop_defs.cc
|
||||
cql3/statements/list_permissions_statement.cc
|
||||
cql3/statements/list_users_statement.cc
|
||||
cql3/statements/modification_statement.cc
|
||||
cql3/statements/permission_altering_statement.cc
|
||||
cql3/statements/property_definitions.cc
|
||||
cql3/statements/raw/parsed_statement.cc
|
||||
cql3/statements/revoke_statement.cc
|
||||
cql3/statements/role-management-statements.cc
|
||||
cql3/statements/schema_altering_statement.cc
|
||||
cql3/statements/select_statement.cc
|
||||
cql3/statements/truncate_statement.cc
|
||||
cql3/statements/update_statement.cc
|
||||
cql3/statements/use_statement.cc
|
||||
cql3/token_relation.cc
|
||||
cql3/tuples.cc
|
||||
cql3/type_json.cc
|
||||
cql3/untyped_result_set.cc
|
||||
cql3/update_parameters.cc
|
||||
cql3/user_types.cc
|
||||
cql3/ut_name.cc
|
||||
cql3/util.cc
|
||||
cql3/values.cc
|
||||
cql3/variable_specifications.cc
|
||||
data/cell.cc
|
||||
database.cc
|
||||
db/batchlog_manager.cc
|
||||
db/commitlog/commitlog.cc
|
||||
db/commitlog/commitlog_entry.cc
|
||||
db/commitlog/commitlog_replayer.cc
|
||||
db/config.cc
|
||||
db/consistency_level.cc
|
||||
db/cql_type_parser.cc
|
||||
db/data_listeners.cc
|
||||
db/extensions.cc
|
||||
db/heat_load_balance.cc
|
||||
db/hints/manager.cc
|
||||
db/hints/resource_manager.cc
|
||||
db/large_data_handler.cc
|
||||
db/legacy_schema_migrator.cc
|
||||
db/marshal/type_parser.cc
|
||||
db/schema_tables.cc
|
||||
db/size_estimates_virtual_reader.cc
|
||||
db/snapshot-ctl.cc
|
||||
db/sstables-format-selector.cc
|
||||
db/system_distributed_keyspace.cc
|
||||
db/system_keyspace.cc
|
||||
db/view/row_locking.cc
|
||||
db/view/view.cc
|
||||
db/view/view_update_generator.cc
|
||||
dht/boot_strapper.cc
|
||||
dht/i_partitioner.cc
|
||||
dht/murmur3_partitioner.cc
|
||||
dht/range_streamer.cc
|
||||
dht/token.cc
|
||||
distributed_loader.cc
|
||||
duration.cc
|
||||
exceptions/exceptions.cc
|
||||
flat_mutation_reader.cc
|
||||
frozen_mutation.cc
|
||||
frozen_schema.cc
|
||||
gms/application_state.cc
|
||||
gms/endpoint_state.cc
|
||||
gms/failure_detector.cc
|
||||
gms/feature_service.cc
|
||||
gms/gossip_digest_ack.cc
|
||||
gms/gossip_digest_ack2.cc
|
||||
gms/gossip_digest_syn.cc
|
||||
gms/gossiper.cc
|
||||
gms/inet_address.cc
|
||||
gms/version_generator.cc
|
||||
gms/versioned_value.cc
|
||||
hashers.cc
|
||||
index/secondary_index.cc
|
||||
index/secondary_index_manager.cc
|
||||
init.cc
|
||||
keys.cc
|
||||
lister.cc
|
||||
locator/abstract_replication_strategy.cc
|
||||
locator/ec2_multi_region_snitch.cc
|
||||
locator/ec2_snitch.cc
|
||||
locator/everywhere_replication_strategy.cc
|
||||
locator/gce_snitch.cc
|
||||
locator/gossiping_property_file_snitch.cc
|
||||
locator/local_strategy.cc
|
||||
locator/network_topology_strategy.cc
|
||||
locator/production_snitch_base.cc
|
||||
locator/rack_inferring_snitch.cc
|
||||
locator/simple_snitch.cc
|
||||
locator/simple_strategy.cc
|
||||
locator/snitch_base.cc
|
||||
locator/token_metadata.cc
|
||||
lua.cc
|
||||
main.cc
|
||||
memtable.cc
|
||||
message/messaging_service.cc
|
||||
multishard_mutation_query.cc
|
||||
mutation.cc
|
||||
raft/fsm.cc
|
||||
raft/log.cc
|
||||
raft/progress.cc
|
||||
raft/raft.cc
|
||||
raft/server.cc
|
||||
mutation_fragment.cc
|
||||
mutation_partition.cc
|
||||
mutation_partition_serializer.cc
|
||||
mutation_partition_view.cc
|
||||
mutation_query.cc
|
||||
mutation_reader.cc
|
||||
mutation_writer/multishard_writer.cc
|
||||
mutation_writer/shard_based_splitting_writer.cc
|
||||
mutation_writer/timestamp_based_splitting_writer.cc
|
||||
mutation_writer/feed_writers.cc
|
||||
partition_slice_builder.cc
|
||||
partition_version.cc
|
||||
querier.cc
|
||||
query-result-set.cc
|
||||
query.cc
|
||||
range_tombstone.cc
|
||||
range_tombstone_list.cc
|
||||
reader_concurrency_semaphore.cc
|
||||
redis/abstract_command.cc
|
||||
redis/command_factory.cc
|
||||
redis/commands.cc
|
||||
redis/keyspace_utils.cc
|
||||
redis/lolwut.cc
|
||||
redis/mutation_utils.cc
|
||||
redis/options.cc
|
||||
redis/query_processor.cc
|
||||
redis/query_utils.cc
|
||||
redis/server.cc
|
||||
redis/service.cc
|
||||
redis/stats.cc
|
||||
repair/repair.cc
|
||||
repair/row_level.cc
|
||||
row_cache.cc
|
||||
schema.cc
|
||||
schema_mutations.cc
|
||||
schema_registry.cc
|
||||
service/client_state.cc
|
||||
service/migration_manager.cc
|
||||
service/migration_task.cc
|
||||
service/misc_services.cc
|
||||
service/pager/paging_state.cc
|
||||
service/pager/query_pagers.cc
|
||||
service/paxos/paxos_state.cc
|
||||
service/paxos/prepare_response.cc
|
||||
service/paxos/prepare_summary.cc
|
||||
service/paxos/proposal.cc
|
||||
service/priority_manager.cc
|
||||
service/storage_proxy.cc
|
||||
service/storage_service.cc
|
||||
sstables/compaction.cc
|
||||
sstables/compaction_manager.cc
|
||||
sstables/compaction_strategy.cc
|
||||
sstables/compress.cc
|
||||
sstables/integrity_checked_file_impl.cc
|
||||
sstables/kl/writer.cc
|
||||
sstables/leveled_compaction_strategy.cc
|
||||
sstables/m_format_read_helpers.cc
|
||||
sstables/metadata_collector.cc
|
||||
sstables/mp_row_consumer.cc
|
||||
sstables/mx/writer.cc
|
||||
sstables/partition.cc
|
||||
sstables/prepended_input_stream.cc
|
||||
sstables/random_access_reader.cc
|
||||
sstables/size_tiered_compaction_strategy.cc
|
||||
sstables/sstable_directory.cc
|
||||
sstables/sstable_version.cc
|
||||
sstables/sstables.cc
|
||||
sstables/sstables_manager.cc
|
||||
sstables/time_window_compaction_strategy.cc
|
||||
sstables/writer.cc
|
||||
streaming/progress_info.cc
|
||||
streaming/session_info.cc
|
||||
streaming/stream_coordinator.cc
|
||||
streaming/stream_manager.cc
|
||||
streaming/stream_plan.cc
|
||||
streaming/stream_reason.cc
|
||||
streaming/stream_receive_task.cc
|
||||
streaming/stream_request.cc
|
||||
streaming/stream_result_future.cc
|
||||
streaming/stream_session.cc
|
||||
streaming/stream_session_state.cc
|
||||
streaming/stream_summary.cc
|
||||
streaming/stream_task.cc
|
||||
streaming/stream_transfer_task.cc
|
||||
table.cc
|
||||
table_helper.cc
|
||||
thrift/controller.cc
|
||||
thrift/handler.cc
|
||||
thrift/server.cc
|
||||
thrift/thrift_validation.cc
|
||||
timeout_config.cc
|
||||
tracing/trace_keyspace_helper.cc
|
||||
tracing/trace_state.cc
|
||||
tracing/traced_file.cc
|
||||
tracing/tracing.cc
|
||||
tracing/tracing_backend_registry.cc
|
||||
transport/controller.cc
|
||||
transport/cql_protocol_extension.cc
|
||||
transport/event.cc
|
||||
transport/event_notifier.cc
|
||||
transport/messages/result_message.cc
|
||||
transport/server.cc
|
||||
types.cc
|
||||
unimplemented.cc
|
||||
utils/UUID_gen.cc
|
||||
utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc
|
||||
utils/array-search.cc
|
||||
utils/ascii.cc
|
||||
utils/big_decimal.cc
|
||||
utils/bloom_calculations.cc
|
||||
utils/bloom_filter.cc
|
||||
utils/buffer_input_stream.cc
|
||||
utils/build_id.cc
|
||||
utils/config_file.cc
|
||||
utils/directories.cc
|
||||
utils/disk-error-handler.cc
|
||||
utils/dynamic_bitset.cc
|
||||
utils/error_injection.cc
|
||||
utils/exceptions.cc
|
||||
utils/file_lock.cc
|
||||
utils/generation-number.cc
|
||||
utils/gz/crc_combine.cc
|
||||
utils/human_readable.cc
|
||||
utils/i_filter.cc
|
||||
utils/large_bitset.cc
|
||||
utils/like_matcher.cc
|
||||
utils/limiting_data_source.cc
|
||||
utils/logalloc.cc
|
||||
utils/managed_bytes.cc
|
||||
utils/multiprecision_int.cc
|
||||
utils/murmur_hash.cc
|
||||
utils/rate_limiter.cc
|
||||
utils/rjson.cc
|
||||
utils/runtime.cc
|
||||
utils/updateable_value.cc
|
||||
utils/utf8.cc
|
||||
utils/uuid.cc
|
||||
validation.cc
|
||||
vint-serialization.cc
|
||||
zstd.cc
|
||||
release.cc)
|
||||
|
||||
set(scylla_gen_sources
|
||||
"${scylla_thrift_gen_cassandra_files}"
|
||||
"${scylla_ragel_gen_protocol_parser_file}"
|
||||
"${swagger_gen_files}"
|
||||
"${idl_gen_files}"
|
||||
"${antlr3_gen_files}")
|
||||
set(SCYLLA_SOURCE_FILES
|
||||
${SCYLLA_ROOT_SOURCE_FILES}
|
||||
${SCYLLA_GEN_SOURCE_FILES}
|
||||
${SCYLLA_SUB_SOURCE_FILES})
|
||||
|
||||
add_executable(scylla
|
||||
${scylla_sources}
|
||||
${scylla_gen_sources})
|
||||
${SEASTAR_SOURCE_FILES}
|
||||
${SCYLLA_SOURCE_FILES})
|
||||
|
||||
target_link_libraries(scylla PRIVATE
|
||||
seastar
|
||||
# Boost dependencies
|
||||
Boost::filesystem
|
||||
Boost::program_options
|
||||
Boost::system
|
||||
Boost::thread
|
||||
Boost::regex
|
||||
Boost::headers
|
||||
# Abseil libs
|
||||
absl::hashtablez_sampler
|
||||
absl::raw_hash_set
|
||||
absl::synchronization
|
||||
absl::graphcycles_internal
|
||||
absl::stacktrace
|
||||
absl::symbolize
|
||||
absl::debugging_internal
|
||||
absl::demangle_internal
|
||||
absl::time
|
||||
absl::time_zone
|
||||
absl::int128
|
||||
absl::city
|
||||
absl::hash
|
||||
absl::malloc_internal
|
||||
absl::spinlock_wait
|
||||
absl::base
|
||||
absl::dynamic_annotations
|
||||
absl::raw_logging_internal
|
||||
absl::exponential_biased
|
||||
absl::throw_delegate
|
||||
# System libs
|
||||
ZLIB::ZLIB
|
||||
ICU::uc
|
||||
systemd
|
||||
zstd
|
||||
snappy
|
||||
${LUA_LIBRARIES}
|
||||
thrift
|
||||
crypt)
|
||||
# If the Seastar pkg-config information is available, append to the default flags.
|
||||
#
|
||||
# For ease of browsing the source code, we always pretend that DPDK is enabled.
|
||||
target_compile_options(scylla PUBLIC
|
||||
-std=gnu++20
|
||||
-DHAVE_DPDK
|
||||
-DHAVE_HWLOC
|
||||
"${SEASTAR_CFLAGS}")
|
||||
|
||||
target_link_libraries(scylla PRIVATE
|
||||
-Wl,--build-id=sha1 # Force SHA1 build-id generation
|
||||
# TODO: Use lld linker if it's available, otherwise gold, else bfd
|
||||
-fuse-ld=lld)
|
||||
# TODO: patch dynamic linker to match configure.py behavior
|
||||
|
||||
target_compile_options(scylla PRIVATE
|
||||
-std=gnu++20
|
||||
-fcoroutines # TODO: Clang does not have this flag, adjust to both variants
|
||||
${target_arch_flag})
|
||||
# Hacks needed to expose internal APIs for xxhash dependencies
|
||||
target_compile_definitions(scylla PRIVATE XXH_PRIVATE_API HAVE_LZ4_COMPRESS_DEFAULT)
|
||||
|
||||
target_include_directories(scylla PRIVATE
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
libdeflate
|
||||
abseil
|
||||
"${scylla_gen_build_dir}")
|
||||
|
||||
###
|
||||
### Create crc_combine_table helper executable.
|
||||
### Use it to generate crc_combine_table.cc to be used in scylla at build time.
|
||||
###
|
||||
add_executable(crc_combine_table utils/gz/gen_crc_combine_table.cc)
|
||||
target_link_libraries(crc_combine_table PRIVATE seastar)
|
||||
target_include_directories(crc_combine_table PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
target_compile_options(crc_combine_table PRIVATE
|
||||
-std=gnu++20
|
||||
-fcoroutines
|
||||
${target_arch_flag})
|
||||
add_dependencies(scylla crc_combine_table)
|
||||
|
||||
# Generate an additional source file at build time that is needed for Scylla compilation
|
||||
add_custom_command(OUTPUT "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc"
|
||||
COMMAND $<TARGET_FILE:crc_combine_table> > "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc"
|
||||
DEPENDS crc_combine_table)
|
||||
target_sources(scylla PRIVATE "${scylla_gen_build_dir}/utils/gz/crc_combine_table.cc")
|
||||
|
||||
###
|
||||
### Generate version file and supply appropriate compile definitions for release.cc
|
||||
###
|
||||
execute_process(COMMAND ${CMAKE_SOURCE_DIR}/SCYLLA-VERSION-GEN RESULT_VARIABLE scylla_version_gen_res)
|
||||
if(scylla_version_gen_res)
|
||||
message(SEND_ERROR "Version file generation failed. Return code: ${scylla_version_gen_res}")
|
||||
endif()
|
||||
|
||||
file(READ build/SCYLLA-VERSION-FILE scylla_version)
|
||||
string(STRIP "${scylla_version}" scylla_version)
|
||||
|
||||
file(READ build/SCYLLA-RELEASE-FILE scylla_release)
|
||||
string(STRIP "${scylla_release}" scylla_release)
|
||||
|
||||
get_property(release_cdefs SOURCE "${CMAKE_SOURCE_DIR}/release.cc" PROPERTY COMPILE_DEFINITIONS)
|
||||
list(APPEND release_cdefs "SCYLLA_VERSION=\"${scylla_version}\"" "SCYLLA_RELEASE=\"${scylla_release}\"")
|
||||
set_source_files_properties("${CMAKE_SOURCE_DIR}/release.cc" PROPERTIES COMPILE_DEFINITIONS "${release_cdefs}")
|
||||
|
||||
###
|
||||
### Custom command for building libdeflate. Link the library to scylla.
|
||||
###
|
||||
set(libdeflate_lib "${scylla_build_dir}/libdeflate/libdeflate.a")
|
||||
add_custom_command(OUTPUT "${libdeflate_lib}"
|
||||
COMMAND make -C libdeflate
|
||||
BUILD_DIR=../build/${BUILD_TYPE}/libdeflate/
|
||||
CC=${CMAKE_C_COMPILER}
|
||||
"CFLAGS=${target_arch_flag}"
|
||||
../build/${BUILD_TYPE}/libdeflate//libdeflate.a) # Two backslashes are important!
|
||||
# Hack to force generating custom command to produce libdeflate.a
|
||||
add_custom_target(libdeflate DEPENDS "${libdeflate_lib}")
|
||||
target_link_libraries(scylla PRIVATE "${libdeflate_lib}")
|
||||
|
||||
# TODO: create cmake/ directory and move utilities (generate functions etc) there
|
||||
# TODO: Build tests if BUILD_TESTING=on (using CTest module)
|
||||
# The order matters here: prefer the "static" DPDK directories to any dynamic paths from pkg-config. Some files are only
|
||||
# available dynamically, though.
|
||||
target_include_directories(scylla PUBLIC
|
||||
.
|
||||
${SEASTAR_DPDK_INCLUDE_DIRS}
|
||||
${SEASTAR_INCLUDE_DIRS}
|
||||
${Boost_INCLUDE_DIRS}
|
||||
xxhash
|
||||
libdeflate
|
||||
abseil
|
||||
build/${BUILD_TYPE}/gen)
|
||||
|
||||
@@ -1,18 +1,11 @@
|
||||
# Contributing to Scylla
|
||||
# Asking questions or requesting help
|
||||
|
||||
## Asking questions or requesting help
|
||||
Use the [ScyllaDB user mailing list](https://groups.google.com/forum/#!forum/scylladb-users) or the [Slack workspace](http://slack.scylladb.com) for general questions and help.
|
||||
|
||||
Use the [Scylla Users mailing list](https://groups.google.com/g/scylladb-users) or the [Slack workspace](http://slack.scylladb.com) for general questions and help.
|
||||
# Reporting an issue
|
||||
|
||||
Join the [Scylla Developers mailing list](https://groups.google.com/g/scylladb-dev) for deeper technical discussions and to discuss your ideas for contributions.
|
||||
Please use the [Issue Tracker](https://github.com/scylladb/scylla/issues/) to report issues. Fill in as much information as you can in the issue template, especially for performance problems.
|
||||
|
||||
## Reporting an issue
|
||||
# Contributing Code to Scylla
|
||||
|
||||
Please use the [issue tracker](https://github.com/scylladb/scylla/issues/) to report issues or to suggest features. Fill in as much information as you can in the issue template, especially for performance problems.
|
||||
|
||||
## Contributing code to Scylla
|
||||
|
||||
Before you can contribute code to Scylla for the first time, you should sign the [Contributor License Agreement](https://www.scylladb.com/open-source/contributor-agreement/) and send the signed form cla@scylladb.com. You can then submit your changes as patches to the to the [scylladb-dev mailing list](https://groups.google.com/forum/#!forum/scylladb-dev) or as a pull request to the [Scylla project on github](https://github.com/scylladb/scylla).
|
||||
If you need help formatting or sending patches, [check out these instructions](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches).
|
||||
|
||||
The Scylla C++ source code uses the [Seastar coding style](https://github.com/scylladb/seastar/blob/master/coding-style.md) so please adhere to that in your patches. Note that Scylla code is written with `using namespace seastar`, so should not explicitly add the `seastar::` prefix to Seastar symbols. You will usually not need to add `using namespace seastar` to new source files, because most Scylla header files have `#include "seastarx.hh"`, which does this.
|
||||
To contribute code to Scylla, you need to sign the [Contributor License Agreement](https://www.scylladb.com/open-source/contributor-agreement/) and send your changes as [patches](https://github.com/scylladb/scylla/wiki/Formatting-and-sending-patches) to the [mailing list](https://groups.google.com/forum/#!forum/scylladb-dev). We don't accept pull requests on GitHub.
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
Dedicated to the memory of Alberto José Araújo, a coworker and a friend.
|
||||
@@ -5,5 +5,3 @@ It includes files from https://github.com/antonblanchard/crc32-vpmsum (author An
|
||||
These files are located in utils/arch/powerpc/crc32-vpmsum. Their license may be found in licenses/LICENSE-crc32-vpmsum.TXT.
|
||||
|
||||
It includes modified code from https://gitbox.apache.org/repos/asf?p=cassandra-dtest.git (owned by The Apache Software Foundation)
|
||||
|
||||
It includes modified tests from https://github.com/etcd-io/etcd.git (owned by The etcd Authors)
|
||||
|
||||
@@ -42,7 +42,7 @@ For further information, please see:
|
||||
* [Docker image build documentation] for information on how to build Docker images.
|
||||
|
||||
[developer documentation]: HACKING.md
|
||||
[build documentation]: docs/guides/building.md
|
||||
[build documentation]: docs/building.md
|
||||
[docker image build documentation]: dist/docker/redhat/README.md
|
||||
|
||||
## Running Scylla
|
||||
@@ -65,7 +65,7 @@ $ ./tools/toolchain/dbuild ./build/release/scylla --help
|
||||
|
||||
## Testing
|
||||
|
||||
See [test.py manual](docs/guides/testing.md).
|
||||
See [test.py manual](docs/testing.md).
|
||||
|
||||
## Scylla APIs and compatibility
|
||||
By default, Scylla is compatible with Apache Cassandra and its APIs - CQL and
|
||||
@@ -78,7 +78,10 @@ and the current compatibility of this feature as well as Scylla-specific extensi
|
||||
|
||||
## Documentation
|
||||
|
||||
Documentation can be found [here](https://scylla.docs.scylladb.com).
|
||||
Documentation can be found in [./docs](./docs) and on the
|
||||
[wiki](https://github.com/scylladb/scylla/wiki). There is currently no clear
|
||||
definition of what goes where, so when looking for something be sure to check
|
||||
both.
|
||||
Seastar documentation can be found [here](http://docs.seastar.io/master/index.html).
|
||||
User documentation can be found [here](https://docs.scylladb.com/).
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
PRODUCT=scylla
|
||||
VERSION=4.5.7
|
||||
VERSION=4.3.7
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
2
abseil
2
abseil
Submodule abseil updated: 9c6a50fdd8...1e3d25b265
@@ -62,14 +62,6 @@ static std::string apply_sha256(std::string_view msg) {
|
||||
return to_hex(hasher.finalize());
|
||||
}
|
||||
|
||||
static std::string apply_sha256(const std::vector<temporary_buffer<char>>& msg) {
|
||||
sha256_hasher hasher;
|
||||
for (const temporary_buffer<char>& buf : msg) {
|
||||
hasher.update(buf.get(), buf.size());
|
||||
}
|
||||
return to_hex(hasher.finalize());
|
||||
}
|
||||
|
||||
static std::string format_time_point(db_clock::time_point tp) {
|
||||
time_t time_point_repr = db_clock::to_time_t(tp);
|
||||
std::string time_point_str;
|
||||
@@ -99,7 +91,7 @@ void check_expiry(std::string_view signature_date) {
|
||||
|
||||
std::string get_signature(std::string_view access_key_id, std::string_view secret_access_key, std::string_view host, std::string_view method,
|
||||
std::string_view orig_datestamp, std::string_view signed_headers_str, const std::map<std::string_view, std::string_view>& signed_headers_map,
|
||||
const std::vector<temporary_buffer<char>>& body_content, std::string_view region, std::string_view service, std::string_view query_string) {
|
||||
std::string_view body_content, std::string_view region, std::string_view service, std::string_view query_string) {
|
||||
auto amz_date_it = signed_headers_map.find("x-amz-date");
|
||||
if (amz_date_it == signed_headers_map.end()) {
|
||||
throw api_error::invalid_signature("X-Amz-Date header is mandatory for signature verification");
|
||||
@@ -137,7 +129,8 @@ future<std::string> get_key_from_roles(cql3::query_processor& qp, std::string us
|
||||
auth::meta::roles_table::qualified_name, auth::meta::roles_table::role_col_name);
|
||||
|
||||
auto cl = auth::password_authenticator::consistency_for_user(username);
|
||||
return qp.execute_internal(query, cl, auth::internal_distributed_query_state(), {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
auto& timeout = auth::internal_distributed_timeout_config();
|
||||
return qp.execute_internal(query, cl, timeout, {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
auto res = f.get0();
|
||||
auto salted_hash = std::optional<sstring>();
|
||||
if (res->empty()) {
|
||||
|
||||
@@ -39,7 +39,7 @@ using key_cache = utils::loading_cache<std::string, std::string>;
|
||||
|
||||
std::string get_signature(std::string_view access_key_id, std::string_view secret_access_key, std::string_view host, std::string_view method,
|
||||
std::string_view orig_datestamp, std::string_view signed_headers_str, const std::map<std::string_view, std::string_view>& signed_headers_map,
|
||||
const std::vector<temporary_buffer<char>>& body_content, std::string_view region, std::string_view service, std::string_view query_string);
|
||||
std::string_view body_content, std::string_view region, std::string_view service, std::string_view query_string);
|
||||
|
||||
future<std::string> get_key_from_roles(cql3::query_processor& qp, std::string username);
|
||||
|
||||
|
||||
@@ -59,9 +59,6 @@ public:
|
||||
static api_error invalid_signature(std::string msg) {
|
||||
return api_error("InvalidSignatureException", std::move(msg));
|
||||
}
|
||||
static api_error missing_authentication_token(std::string msg) {
|
||||
return api_error("MissingAuthenticationTokenException", std::move(msg));
|
||||
}
|
||||
static api_error unrecognized_client(std::string msg) {
|
||||
return api_error("UnrecognizedClientException", std::move(msg));
|
||||
}
|
||||
@@ -80,9 +77,6 @@ public:
|
||||
static api_error trimmed_data_access_exception(std::string msg) {
|
||||
return api_error("TrimmedDataAccessException", std::move(msg));
|
||||
}
|
||||
static api_error request_limit_exceeded(std::string msg) {
|
||||
return api_error("RequestLimitExceeded", std::move(msg));
|
||||
}
|
||||
static api_error internal(std::string msg) {
|
||||
return api_error("InternalServerError", std::move(msg), reply::status_type::internal_server_error);
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@
|
||||
#include "schema.hh"
|
||||
#include "alternator/tags_extension.hh"
|
||||
#include "alternator/rmw_operation.hh"
|
||||
#include <seastar/core/coroutine.hh>
|
||||
|
||||
#include <boost/range/adaptors.hpp>
|
||||
|
||||
logging::logger elogger("alternator-executor");
|
||||
@@ -202,7 +202,7 @@ static schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& r
|
||||
if (!schema) {
|
||||
// if we get here then the name was missing, since syntax or missing actual CF
|
||||
// checks throw. Slow path, but just call get_table_name to generate exception.
|
||||
get_table_name(request);
|
||||
get_table_name(request);
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
@@ -220,7 +220,7 @@ static std::tuple<bool, std::string_view, std::string_view> try_get_internal_tab
|
||||
std::string_view ks_name = table_name.substr(0, delim);
|
||||
table_name.remove_prefix(ks_name.size() + 1);
|
||||
// Only internal keyspaces can be accessed to avoid leakage
|
||||
if (!is_internal_keyspace(ks_name)) {
|
||||
if (!is_internal_keyspace(sstring(ks_name))) {
|
||||
return {false, "", ""};
|
||||
}
|
||||
return {true, ks_name, table_name};
|
||||
@@ -404,7 +404,6 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
|
||||
// returned.
|
||||
rjson::set(table_description, "TableStatus", "ACTIVE");
|
||||
rjson::set(table_description, "TableArn", generate_arn_for_table(*schema));
|
||||
rjson::set(table_description, "TableId", rjson::from_string(schema->id().to_sstring()));
|
||||
// FIXME: Instead of hardcoding, we should take into account which mode was chosen
|
||||
// when the table was created. But, Spark jobs expect something to be returned
|
||||
// and PAY_PER_REQUEST seems closer to reality than PROVISIONED.
|
||||
@@ -476,8 +475,8 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
|
||||
return make_ready_future<request_return_type>(api_error::resource_not_found(
|
||||
format("Requested resource not found: Table: {} not found", table_name)));
|
||||
}
|
||||
return _mm.announce_column_family_drop(keyspace_name, table_name, service::migration_manager::drop_views::yes).then([this, keyspace_name] {
|
||||
return _mm.announce_keyspace_drop(keyspace_name);
|
||||
return _mm.announce_column_family_drop(keyspace_name, table_name, false, service::migration_manager::drop_views::yes).then([this, keyspace_name] {
|
||||
return _mm.announce_keyspace_drop(keyspace_name, false);
|
||||
}).then([table_name = std::move(table_name)] {
|
||||
// FIXME: need more attributes?
|
||||
rjson::value table_description = rjson::empty_object();
|
||||
@@ -704,48 +703,52 @@ static void update_tags_map(const rjson::value& tags, std::map<sstring, sstring>
|
||||
static future<> update_tags(service::migration_manager& mm, schema_ptr schema, std::map<sstring, sstring>&& tags_map) {
|
||||
schema_builder builder(schema);
|
||||
builder.add_extension(tags_extension::NAME, ::make_shared<tags_extension>(std::move(tags_map)));
|
||||
return mm.announce_column_family_update(builder.build(), false, std::vector<view_ptr>());
|
||||
return mm.announce_column_family_update(builder.build(), false, std::vector<view_ptr>(), false);
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::tag_resource(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.tag_resource++;
|
||||
|
||||
const rjson::value* arn = rjson::find(request, "ResourceArn");
|
||||
if (!arn || !arn->IsString()) {
|
||||
co_return api_error::access_denied("Incorrect resource identifier");
|
||||
}
|
||||
schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
|
||||
std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
|
||||
const rjson::value* tags = rjson::find(request, "Tags");
|
||||
if (!tags || !tags->IsArray()) {
|
||||
co_return api_error::validation("Cannot parse tags");
|
||||
}
|
||||
if (tags->Size() < 1) {
|
||||
co_return api_error::validation("The number of tags must be at least 1") ;
|
||||
}
|
||||
update_tags_map(*tags, tags_map, update_tags_action::add_tags);
|
||||
co_await update_tags(_mm, schema, std::move(tags_map));
|
||||
co_return json_string("");
|
||||
return seastar::async([this, &client_state, request = std::move(request)] () mutable -> request_return_type {
|
||||
const rjson::value* arn = rjson::find(request, "ResourceArn");
|
||||
if (!arn || !arn->IsString()) {
|
||||
return api_error::access_denied("Incorrect resource identifier");
|
||||
}
|
||||
schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
|
||||
std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
|
||||
const rjson::value* tags = rjson::find(request, "Tags");
|
||||
if (!tags || !tags->IsArray()) {
|
||||
return api_error::validation("Cannot parse tags");
|
||||
}
|
||||
if (tags->Size() < 1) {
|
||||
return api_error::validation("The number of tags must be at least 1") ;
|
||||
}
|
||||
update_tags_map(*tags, tags_map, update_tags_action::add_tags);
|
||||
update_tags(_mm, schema, std::move(tags_map)).get();
|
||||
return json_string("");
|
||||
});
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::untag_resource(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.untag_resource++;
|
||||
|
||||
const rjson::value* arn = rjson::find(request, "ResourceArn");
|
||||
if (!arn || !arn->IsString()) {
|
||||
co_return api_error::access_denied("Incorrect resource identifier");
|
||||
}
|
||||
const rjson::value* tags = rjson::find(request, "TagKeys");
|
||||
if (!tags || !tags->IsArray()) {
|
||||
co_return api_error::validation(format("Cannot parse tag keys"));
|
||||
}
|
||||
return seastar::async([this, &client_state, request = std::move(request)] () -> request_return_type {
|
||||
const rjson::value* arn = rjson::find(request, "ResourceArn");
|
||||
if (!arn || !arn->IsString()) {
|
||||
return api_error::access_denied("Incorrect resource identifier");
|
||||
}
|
||||
const rjson::value* tags = rjson::find(request, "TagKeys");
|
||||
if (!tags || !tags->IsArray()) {
|
||||
return api_error::validation(format("Cannot parse tag keys"));
|
||||
}
|
||||
|
||||
schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
|
||||
schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
|
||||
|
||||
std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
|
||||
update_tags_map(*tags, tags_map, update_tags_action::delete_tags);
|
||||
co_await update_tags(_mm, schema, std::move(tags_map));
|
||||
co_return json_string("");
|
||||
std::map<sstring, sstring> tags_map = get_tags_of_table(schema);
|
||||
update_tags_map(*tags, tags_map, update_tags_action::delete_tags);
|
||||
update_tags(_mm, schema, std::move(tags_map)).get();
|
||||
return json_string("");
|
||||
});
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::list_tags_of_resource(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
@@ -981,7 +984,7 @@ future<executor::request_return_type> executor::create_table(client_state& clien
|
||||
return create_keyspace(keyspace_name).handle_exception_type([] (exceptions::already_exists_exception&) {
|
||||
// Ignore the fact that the keyspace may already exist. See discussion in #6340
|
||||
}).then([this, table_name, request = std::move(request), schema, view_builders = std::move(view_builders), tags_map = std::move(tags_map)] () mutable {
|
||||
return futurize_invoke([&] { return _mm.announce_new_column_family(schema); }).then([this, table_info = std::move(request), schema, view_builders = std::move(view_builders), tags_map = std::move(tags_map)] () mutable {
|
||||
return futurize_invoke([&] { return _mm.announce_new_column_family(schema, false); }).then([this, table_info = std::move(request), schema, view_builders = std::move(view_builders), tags_map = std::move(tags_map)] () mutable {
|
||||
return parallel_for_each(std::move(view_builders), [this, schema] (schema_builder builder) {
|
||||
return _mm.announce_new_view(view_ptr(builder.build()));
|
||||
}).then([this, table_info = std::move(table_info), schema, tags_map = std::move(tags_map)] () mutable {
|
||||
@@ -1237,16 +1240,10 @@ mutation put_or_delete_item::build(schema_ptr schema, api::timestamp_type ts) co
|
||||
return m;
|
||||
}
|
||||
|
||||
// The DynamoDB API doesn't let the client control the server's timeout, so
|
||||
// we have a global default_timeout() for Alternator requests. The value of
|
||||
// default_timeout is overwritten by main.cc based on the
|
||||
// "alternator_timeout_in_ms" configuration parameter.
|
||||
db::timeout_clock::duration executor::s_default_timeout = 10s;
|
||||
void executor::set_default_timeout(db::timeout_clock::duration timeout) {
|
||||
s_default_timeout = timeout;
|
||||
}
|
||||
// The DynamoDB API doesn't let the client control the server's timeout.
|
||||
// Let's pick something reasonable:
|
||||
db::timeout_clock::time_point executor::default_timeout() {
|
||||
return db::timeout_clock::now() + s_default_timeout;
|
||||
return db::timeout_clock::now() + 10s;
|
||||
}
|
||||
|
||||
static future<std::unique_ptr<rjson::value>> get_previous_item(
|
||||
@@ -1882,182 +1879,18 @@ static std::string get_item_type_string(const rjson::value& v) {
|
||||
return it->name.GetString();
|
||||
}
|
||||
|
||||
// attrs_to_get saves for each top-level attribute an attrs_to_get_node,
|
||||
// a hierarchy of subparts that need to be kept. The following function
|
||||
// takes a given JSON value and drops its parts which weren't asked to be
|
||||
// kept. It modifies the given JSON value, or returns false to signify that
|
||||
// the entire object should be dropped.
|
||||
// Note that The JSON value is assumed to be encoded using the DynamoDB
|
||||
// conventions - i.e., it is really a map whose key has a type string,
|
||||
// and the value is the real object.
|
||||
template<typename T>
|
||||
static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>& h) {
|
||||
if (!val.IsObject() || val.MemberCount() != 1) {
|
||||
// This shouldn't happen. We shouldn't have stored malformed objects.
|
||||
// But today Alternator does not validate the structure of nested
|
||||
// documents before storing them, so this can happen on read.
|
||||
throw api_error::internal(format("Malformed value object read: {}", val));
|
||||
}
|
||||
const char* type = val.MemberBegin()->name.GetString();
|
||||
rjson::value& v = val.MemberBegin()->value;
|
||||
if (h.has_members()) {
|
||||
const auto& members = h.get_members();
|
||||
if (type[0] != 'M' || !v.IsObject()) {
|
||||
// If v is not an object (dictionary, map), none of the members
|
||||
// can match.
|
||||
return false;
|
||||
}
|
||||
rjson::value newv = rjson::empty_object();
|
||||
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
|
||||
std::string attr = it->name.GetString();
|
||||
auto x = members.find(attr);
|
||||
if (x != members.end()) {
|
||||
if (x->second) {
|
||||
// Only a part of this attribute is to be filtered, do it.
|
||||
if (hierarchy_filter(it->value, *x->second)) {
|
||||
rjson::set_with_string_name(newv, attr, std::move(it->value));
|
||||
}
|
||||
} else {
|
||||
// The entire attribute is to be kept
|
||||
rjson::set_with_string_name(newv, attr, std::move(it->value));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newv.MemberCount() == 0) {
|
||||
return false;
|
||||
}
|
||||
v = newv;
|
||||
} else if (h.has_indexes()) {
|
||||
const auto& indexes = h.get_indexes();
|
||||
if (type[0] != 'L' || !v.IsArray()) {
|
||||
return false;
|
||||
}
|
||||
rjson::value newv = rjson::empty_array();
|
||||
const auto& a = v.GetArray();
|
||||
for (unsigned i = 0; i < v.Size(); i++) {
|
||||
auto x = indexes.find(i);
|
||||
if (x != indexes.end()) {
|
||||
if (x->second) {
|
||||
if (hierarchy_filter(a[i], *x->second)) {
|
||||
rjson::push_back(newv, std::move(a[i]));
|
||||
}
|
||||
} else {
|
||||
// The entire attribute is to be kept
|
||||
rjson::push_back(newv, std::move(a[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newv.Size() == 0) {
|
||||
return false;
|
||||
}
|
||||
v = newv;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Add a path to a attribute_path_map. Throws a validation error if the path
|
||||
// "overlaps" with one already in the filter (one is a sub-path of the other)
|
||||
// or "conflicts" with it (both a member and index is requested).
|
||||
template<typename T>
|
||||
void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const parsed::path& p, T value = {}) {
|
||||
using node = attribute_path_map_node<T>;
|
||||
// The first step is to look for the top-level attribute (p.root()):
|
||||
auto it = map.find(p.root());
|
||||
if (it == map.end()) {
|
||||
if (p.has_operators()) {
|
||||
it = map.emplace(p.root(), node {std::nullopt}).first;
|
||||
} else {
|
||||
(void) map.emplace(p.root(), node {std::move(value)}).first;
|
||||
// Value inserted for top-level node. We're done.
|
||||
return;
|
||||
}
|
||||
} else if(!p.has_operators()) {
|
||||
// If p is top-level and we already have it or a part of it
|
||||
// in map, it's a forbidden overlapping path.
|
||||
throw api_error::validation(format(
|
||||
"Invalid {}: two document paths overlap at {}", source, p.root()));
|
||||
} else if (it->second.has_value()) {
|
||||
// If we're here, it != map.end() && p.has_operators && it->second.has_value().
|
||||
// This means the top-level attribute already has a value, and we're
|
||||
// trying to add a non-top-level value. It's an overlap.
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p.root()));
|
||||
}
|
||||
node* h = &it->second;
|
||||
// The second step is to walk h from the top-level node to the inner node
|
||||
// where we're supposed to insert the value:
|
||||
for (const auto& op : p.operators()) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const std::string& member) {
|
||||
if (h->is_empty()) {
|
||||
*h = node {typename node::members_t()};
|
||||
} else if (h->has_indexes()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
|
||||
} else if (h->has_value()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
typename node::members_t& members = h->get_members();
|
||||
auto it = members.find(member);
|
||||
if (it == members.end()) {
|
||||
it = members.insert({member, std::make_unique<node>()}).first;
|
||||
}
|
||||
h = it->second.get();
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
if (h->is_empty()) {
|
||||
*h = node {typename node::indexes_t()};
|
||||
} else if (h->has_members()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
|
||||
} else if (h->has_value()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
typename node::indexes_t& indexes = h->get_indexes();
|
||||
auto it = indexes.find(index);
|
||||
if (it == indexes.end()) {
|
||||
it = indexes.insert({index, std::make_unique<node>()}).first;
|
||||
}
|
||||
h = it->second.get();
|
||||
}
|
||||
}, op);
|
||||
}
|
||||
// Finally, insert the value in the node h.
|
||||
if (h->is_empty()) {
|
||||
*h = node {std::move(value)};
|
||||
} else {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
}
|
||||
|
||||
// A very simplified version of the above function for the special case of
|
||||
// adding only top-level attribute. It's not only simpler, we also use a
|
||||
// different error message, referring to a "duplicate attribute"instead of
|
||||
// "overlapping paths". DynamoDB also has this distinction (errors in
|
||||
// AttributesToGet refer to duplicates, not overlaps, but errors in
|
||||
// ProjectionExpression refer to overlap - even if it's an exact duplicate).
|
||||
template<typename T>
|
||||
void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const std::string& attr, T value = {}) {
|
||||
using node = attribute_path_map_node<T>;
|
||||
auto it = map.find(attr);
|
||||
if (it == map.end()) {
|
||||
map.emplace(attr, node {std::move(value)});
|
||||
} else {
|
||||
throw api_error::validation(format(
|
||||
"Invalid {}: Duplicate attribute: {}", source, attr));
|
||||
}
|
||||
}
|
||||
|
||||
// calculate_attrs_to_get() takes either AttributesToGet or
|
||||
// ProjectionExpression parameters (having both is *not* allowed),
|
||||
// and returns the list of cells we need to read, or an empty set when
|
||||
// *all* attributes are to be returned.
|
||||
// However, in our current implementation, only top-level attributes are
|
||||
// stored as separate cells - a nested document is stored serialized together
|
||||
// (as JSON) in the same cell. So this function return a map - each key is the
|
||||
// top-level attribute we will need need to read, and the value for each
|
||||
// top-level attribute is the partial hierarchy (struct hierarchy_filter)
|
||||
// that we will need to extract from that serialized JSON.
|
||||
// For example, if ProjectionExpression lists a.b and a.c[2], we
|
||||
// return one top-level attribute name, "a", with the value "{b, c[2]}".
|
||||
static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unordered_set<std::string>& used_attribute_names) {
|
||||
// In our current implementation, only top-level attributes are stored
|
||||
// as cells, and nested documents are stored serialized as JSON.
|
||||
// So this function currently returns only the the top-level attributes
|
||||
// but we also need to add, after the query, filtering to keep only
|
||||
// the parts of the JSON attributes that were chosen in the paths'
|
||||
// operators. Because we don't have such filtering yet (FIXME), we fail here
|
||||
// if the requested paths are anything but top-level attributes.
|
||||
std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req, std::unordered_set<std::string>& used_attribute_names) {
|
||||
const bool has_attributes_to_get = req.HasMember("AttributesToGet");
|
||||
const bool has_projection_expression = req.HasMember("ProjectionExpression");
|
||||
if (has_attributes_to_get && has_projection_expression) {
|
||||
@@ -2066,9 +1899,9 @@ static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unorder
|
||||
}
|
||||
if (has_attributes_to_get) {
|
||||
const rjson::value& attributes_to_get = req["AttributesToGet"];
|
||||
attrs_to_get ret;
|
||||
std::unordered_set<std::string> ret;
|
||||
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
|
||||
attribute_path_map_add("AttributesToGet", ret, it->GetString());
|
||||
ret.insert(it->GetString());
|
||||
}
|
||||
return ret;
|
||||
} else if (has_projection_expression) {
|
||||
@@ -2081,13 +1914,24 @@ static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unorder
|
||||
throw api_error::validation(e.what());
|
||||
}
|
||||
resolve_projection_expression(paths_to_get, expression_attribute_names, used_attribute_names);
|
||||
attrs_to_get ret;
|
||||
for (const parsed::path& p : paths_to_get) {
|
||||
attribute_path_map_add("ProjectionExpression", ret, p);
|
||||
}
|
||||
std::unordered_set<std::string> seen_column_names;
|
||||
auto ret = boost::copy_range<std::unordered_set<std::string>>(paths_to_get |
|
||||
boost::adaptors::transformed([&] (const parsed::path& p) {
|
||||
if (p.has_operators()) {
|
||||
// FIXME: this check will need to change when we support non-toplevel attributes
|
||||
throw api_error::validation("Non-toplevel attributes in ProjectionExpression not yet implemented");
|
||||
}
|
||||
if (!seen_column_names.insert(p.root()).second) {
|
||||
// FIXME: this check will need to change when we support non-toplevel attributes
|
||||
throw api_error::validation(
|
||||
format("Invalid ProjectionExpression: two document paths overlap with each other: {} and {}.",
|
||||
p.root(), p.root()));
|
||||
}
|
||||
return p.root();
|
||||
}));
|
||||
return ret;
|
||||
}
|
||||
// An empty map asks to read everything
|
||||
// An empty set asks to read everything
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -2108,7 +1952,7 @@ static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unorder
|
||||
*/
|
||||
void executor::describe_single_item(const cql3::selection::selection& selection,
|
||||
const std::vector<bytes_opt>& result_row,
|
||||
const attrs_to_get& attrs_to_get,
|
||||
const std::unordered_set<std::string>& attrs_to_get,
|
||||
rjson::value& item,
|
||||
bool include_all_embedded_attributes)
|
||||
{
|
||||
@@ -2129,16 +1973,7 @@ void executor::describe_single_item(const cql3::selection::selection& selection,
|
||||
std::string attr_name = value_cast<sstring>(entry.first);
|
||||
if (include_all_embedded_attributes || attrs_to_get.empty() || attrs_to_get.contains(attr_name)) {
|
||||
bytes value = value_cast<bytes>(entry.second);
|
||||
rjson::value v = deserialize_item(value);
|
||||
auto it = attrs_to_get.find(attr_name);
|
||||
if (it != attrs_to_get.end()) {
|
||||
// attrs_to_get may have asked for only part of this attribute:
|
||||
if (hierarchy_filter(v, it->second)) {
|
||||
rjson::set_with_string_name(item, attr_name, std::move(v));
|
||||
}
|
||||
} else {
|
||||
rjson::set_with_string_name(item, attr_name, std::move(v));
|
||||
}
|
||||
rjson::set_with_string_name(item, attr_name, deserialize_item(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2150,7 +1985,7 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
|
||||
const query::partition_slice& slice,
|
||||
const cql3::selection::selection& selection,
|
||||
const query::result& query_result,
|
||||
const attrs_to_get& attrs_to_get) {
|
||||
const std::unordered_set<std::string>& attrs_to_get) {
|
||||
rjson::value item = rjson::empty_object();
|
||||
|
||||
cql3::selection::result_set_builder builder(selection, gc_clock::now(), cql_serialization_format::latest());
|
||||
@@ -2186,16 +2021,8 @@ static bool check_needs_read_before_write(const parsed::value& v) {
|
||||
}, v._value);
|
||||
}
|
||||
|
||||
static bool check_needs_read_before_write(const attribute_path_map<parsed::update_expression::action>& update_expression) {
|
||||
return boost::algorithm::any_of(update_expression, [](const auto& p) {
|
||||
if (!p.second.has_value()) {
|
||||
// If the action is not on the top-level attribute, we need to
|
||||
// read the old item: we change only a part of the top-level
|
||||
// attribute, and write the full top-level attribute back.
|
||||
return true;
|
||||
}
|
||||
// Otherwise, the action p.second.get_value() is just on top-level
|
||||
// attribute. Check if it needs read-before-write:
|
||||
static bool check_needs_read_before_write(const parsed::update_expression& update_expression) {
|
||||
return boost::algorithm::any_of(update_expression.actions(), [](const parsed::update_expression::action& action) {
|
||||
return std::visit(overloaded_functor {
|
||||
[&] (const parsed::update_expression::action::set& a) -> bool {
|
||||
return check_needs_read_before_write(a._rhs._v1) || (a._rhs._op != 'v' && check_needs_read_before_write(a._rhs._v2));
|
||||
@@ -2209,7 +2036,7 @@ static bool check_needs_read_before_write(const attribute_path_map<parsed::updat
|
||||
[&] (const parsed::update_expression::action::del& a) -> bool {
|
||||
return true;
|
||||
}
|
||||
}, p.second.get_value()._action);
|
||||
}, action._action);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2218,11 +2045,7 @@ public:
|
||||
// Some information parsed during the constructor to check for input
|
||||
// errors, and cached to be used again during apply().
|
||||
rjson::value* _attribute_updates;
|
||||
// Instead of keeping a parsed::update_expression with an unsorted list
|
||||
// list of actions, we keep them in an attribute_path_map which groups
|
||||
// them by top-level attribute, and detects forbidden overlaps/conflicts.
|
||||
attribute_path_map<parsed::update_expression::action> _update_expression;
|
||||
|
||||
parsed::update_expression _update_expression;
|
||||
parsed::condition_expression _condition_expression;
|
||||
|
||||
update_item_operation(service::storage_proxy& proxy, rjson::value&& request);
|
||||
@@ -2253,22 +2076,16 @@ update_item_operation::update_item_operation(service::storage_proxy& proxy, rjso
|
||||
throw api_error::validation("UpdateExpression must be a string");
|
||||
}
|
||||
try {
|
||||
parsed::update_expression expr = parse_update_expression(update_expression->GetString());
|
||||
resolve_update_expression(expr,
|
||||
_update_expression = parse_update_expression(update_expression->GetString());
|
||||
resolve_update_expression(_update_expression,
|
||||
expression_attribute_names, expression_attribute_values,
|
||||
used_attribute_names, used_attribute_values);
|
||||
if (expr.empty()) {
|
||||
throw api_error::validation("Empty expression in UpdateExpression is not allowed");
|
||||
}
|
||||
for (auto& action : expr.actions()) {
|
||||
// Unfortunately we need to copy the action's path, because
|
||||
// we std::move the action object.
|
||||
auto p = action._path;
|
||||
attribute_path_map_add("UpdateExpression", _update_expression, p, std::move(action));
|
||||
}
|
||||
} catch(expressions_syntax_error& e) {
|
||||
throw api_error::validation(e.what());
|
||||
}
|
||||
if (_update_expression.empty()) {
|
||||
throw api_error::validation("Empty expression in UpdateExpression is not allowed");
|
||||
}
|
||||
}
|
||||
_attribute_updates = rjson::find(_request, "AttributeUpdates");
|
||||
if (_attribute_updates) {
|
||||
@@ -2310,187 +2127,6 @@ update_item_operation::needs_read_before_write() const {
|
||||
(_returnvalues != returnvalues::NONE && _returnvalues != returnvalues::UPDATED_NEW);
|
||||
}
|
||||
|
||||
// action_result() returns the result of applying an UpdateItem action -
|
||||
// this result is either a JSON object or an unset optional which indicates
|
||||
// the action was a deletion. The caller (update_item_operation::apply()
|
||||
// below) will either write this JSON as the content of a column, or
|
||||
// use it as a piece in a bigger top-level attribute.
|
||||
static std::optional<rjson::value> action_result(
|
||||
const parsed::update_expression::action& action,
|
||||
const rjson::value* previous_item) {
|
||||
return std::visit(overloaded_functor {
|
||||
[&] (const parsed::update_expression::action::set& a) -> std::optional<rjson::value> {
|
||||
return calculate_value(a._rhs, previous_item);
|
||||
},
|
||||
[&] (const parsed::update_expression::action::remove& a) -> std::optional<rjson::value> {
|
||||
return std::nullopt;
|
||||
},
|
||||
[&] (const parsed::update_expression::action::add& a) -> std::optional<rjson::value> {
|
||||
parsed::value base;
|
||||
parsed::value addition;
|
||||
base.set_path(action._path);
|
||||
addition.set_constant(a._valref);
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item);
|
||||
rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item);
|
||||
rjson::value result;
|
||||
// An ADD can be used to create a new attribute (when
|
||||
// v1.IsNull()) or to add to a pre-existing attribute:
|
||||
if (v1.IsNull()) {
|
||||
std::string v2_type = get_item_type_string(v2);
|
||||
if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
|
||||
result = v2;
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
|
||||
}
|
||||
} else {
|
||||
std::string v1_type = get_item_type_string(v1);
|
||||
if (v1_type == "N") {
|
||||
if (get_item_type_string(v2) != "N") {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = number_add(v1, v2);
|
||||
} else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
|
||||
if (get_item_type_string(v2) != v1_type) {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = set_sum(v1, v2);
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
},
|
||||
[&] (const parsed::update_expression::action::del& a) -> std::optional<rjson::value> {
|
||||
parsed::value base;
|
||||
parsed::value subset;
|
||||
base.set_path(action._path);
|
||||
subset.set_constant(a._valref);
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item);
|
||||
rjson::value v2 = calculate_value(subset, calculate_value_caller::UpdateExpression, previous_item);
|
||||
if (!v1.IsNull()) {
|
||||
return set_diff(v1, v2);
|
||||
}
|
||||
// When we return nullopt here, we ask to *delete* this attribute,
|
||||
// which is unnecessary because we know the attribute does not
|
||||
// exist anyway. This is a waste, but a small one. Note that also
|
||||
// for the "remove" action above we don't bother to check if the
|
||||
// previous_item add anything to remove.
|
||||
return std::nullopt;
|
||||
}
|
||||
}, action._action);
|
||||
}
|
||||
|
||||
// Print an attribute_path_map_node<action> as the list of paths it contains:
|
||||
static std::ostream& operator<<(std::ostream& out, const attribute_path_map_node<parsed::update_expression::action>& h) {
|
||||
if (h.has_value()) {
|
||||
out << " " << h.get_value()._path;
|
||||
} else if (h.has_members()) {
|
||||
for (auto& member : h.get_members()) {
|
||||
out << *member.second;
|
||||
}
|
||||
} else if (h.has_indexes()) {
|
||||
for (auto& index : h.get_indexes()) {
|
||||
out << *index.second;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Apply the hierarchy of actions in an attribute_path_map_node<action> to a
|
||||
// JSON object which uses DynamoDB's serialization conventions. The complete,
|
||||
// unmodified, previous_item is also necessary for the right-hand sides of the
|
||||
// actions. Modifies obj in-place or returns false if it is to be removed.
|
||||
static bool hierarchy_actions(
|
||||
rjson::value& obj,
|
||||
const attribute_path_map_node<parsed::update_expression::action>& h,
|
||||
const rjson::value* previous_item)
|
||||
{
|
||||
if (!obj.IsObject() || obj.MemberCount() != 1) {
|
||||
// This shouldn't happen. We shouldn't have stored malformed objects.
|
||||
// But today Alternator does not validate the structure of nested
|
||||
// documents before storing them, so this can happen on read.
|
||||
throw api_error::validation(format("Malformed value object read: {}", obj));
|
||||
}
|
||||
const char* type = obj.MemberBegin()->name.GetString();
|
||||
rjson::value& v = obj.MemberBegin()->value;
|
||||
if (h.has_value()) {
|
||||
// Action replacing everything in this position in the hierarchy
|
||||
std::optional<rjson::value> newv = action_result(h.get_value(), previous_item);
|
||||
if (newv) {
|
||||
obj = std::move(*newv);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (h.has_members()) {
|
||||
if (type[0] != 'M' || !v.IsObject()) {
|
||||
// A .something on a non-map doesn't work.
|
||||
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
|
||||
}
|
||||
for (const auto& member : h.get_members()) {
|
||||
std::string attr = member.first;
|
||||
const attribute_path_map_node<parsed::update_expression::action>& subh = *member.second;
|
||||
rjson::value *subobj = rjson::find(v, attr);
|
||||
if (subobj) {
|
||||
if (!hierarchy_actions(*subobj, subh, previous_item)) {
|
||||
rjson::remove_member(v, attr);
|
||||
}
|
||||
} else {
|
||||
// When a.b does not exist, setting a.b itself (i.e.
|
||||
// subh.has_value()) is fine, but setting a.b.c is not.
|
||||
if (subh.has_value()) {
|
||||
std::optional<rjson::value> newv = action_result(subh.get_value(), previous_item);
|
||||
if (newv) {
|
||||
rjson::set_with_string_name(v, attr, std::move(*newv));
|
||||
} else {
|
||||
// Removing a.b when a is a map but a.b doesn't exist
|
||||
// is silently ignored. It's not considered an error.
|
||||
}
|
||||
} else {
|
||||
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (h.has_indexes()) {
|
||||
if (type[0] != 'L' || !v.IsArray()) {
|
||||
// A [i] on a non-list doesn't work.
|
||||
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
|
||||
}
|
||||
unsigned nremoved = 0;
|
||||
for (const auto& index : h.get_indexes()) {
|
||||
unsigned i = index.first - nremoved;
|
||||
const attribute_path_map_node<parsed::update_expression::action>& subh = *index.second;
|
||||
if (i < v.Size()) {
|
||||
if (!hierarchy_actions(v[i], subh, previous_item)) {
|
||||
v.Erase(v.Begin() + i);
|
||||
// If we have the actions "REMOVE a[1] SET a[3] = :val",
|
||||
// the index 3 refers to the original indexes, before any
|
||||
// items were removed. So we offset the next indexes
|
||||
// (which are guaranteed to be higher than i - indexes is
|
||||
// a sorted map) by an increased "nremoved".
|
||||
nremoved++;
|
||||
}
|
||||
} else {
|
||||
// If a[7] does not exist, setting a[7] itself (i.e.
|
||||
// subh.has_value()) is fine - and appends an item, though
|
||||
// not necessarily with index 7. But setting a[7].b will
|
||||
// not work.
|
||||
if (subh.has_value()) {
|
||||
std::optional<rjson::value> newv = action_result(subh.get_value(), previous_item);
|
||||
if (newv) {
|
||||
rjson::push_back(v, std::move(*newv));
|
||||
} else {
|
||||
// Removing a[7] when the list has fewer elements is
|
||||
// silently ignored. It's not considered an error.
|
||||
}
|
||||
} else {
|
||||
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<mutation>
|
||||
update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const {
|
||||
if (!verify_expected(_request, previous_item.get()) ||
|
||||
@@ -2505,37 +2141,17 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
auto& row = m.partition().clustered_row(*_schema, _ck);
|
||||
attribute_collector attrs_collector;
|
||||
bool any_updates = false;
|
||||
auto do_update = [&] (bytes&& column_name, const rjson::value& json_value,
|
||||
const attribute_path_map_node<parsed::update_expression::action>* h = nullptr) {
|
||||
auto do_update = [&] (bytes&& column_name, const rjson::value& json_value) {
|
||||
any_updates = true;
|
||||
if (_returnvalues == returnvalues::ALL_NEW) {
|
||||
rjson::replace_with_string_name(_return_attributes,
|
||||
to_sstring_view(column_name), rjson::copy(json_value));
|
||||
} else if (_returnvalues == returnvalues::UPDATED_NEW) {
|
||||
rjson::value&& v = rjson::copy(json_value);
|
||||
if (h) {
|
||||
// If the operation was only on specific attribute paths,
|
||||
// leave only them in _return_attributes.
|
||||
if (hierarchy_filter(v, *h)) {
|
||||
rjson::set_with_string_name(_return_attributes,
|
||||
to_sstring_view(column_name), std::move(v));
|
||||
}
|
||||
} else {
|
||||
rjson::set_with_string_name(_return_attributes,
|
||||
to_sstring_view(column_name), std::move(v));
|
||||
}
|
||||
if (_returnvalues == returnvalues::ALL_NEW ||
|
||||
_returnvalues == returnvalues::UPDATED_NEW) {
|
||||
rjson::set_with_string_name(_return_attributes,
|
||||
to_sstring_view(column_name), rjson::copy(json_value));
|
||||
} else if (_returnvalues == returnvalues::UPDATED_OLD && previous_item) {
|
||||
std::string_view cn = to_sstring_view(column_name);
|
||||
const rjson::value* col = rjson::find(*previous_item, cn);
|
||||
if (col) {
|
||||
rjson::value&& v = rjson::copy(*col);
|
||||
if (h) {
|
||||
if (hierarchy_filter(v, *h)) {
|
||||
rjson::set_with_string_name(_return_attributes, cn, std::move(v));
|
||||
}
|
||||
} else {
|
||||
rjson::set_with_string_name(_return_attributes, cn, std::move(v));
|
||||
}
|
||||
rjson::set_with_string_name(_return_attributes, cn, rjson::copy(*col));
|
||||
}
|
||||
}
|
||||
const column_definition* cdef = _schema->get_column_definition(column_name);
|
||||
@@ -2577,7 +2193,7 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
// can just move previous_item later, when we don't need it any more.
|
||||
if (_returnvalues == returnvalues::ALL_NEW) {
|
||||
if (previous_item) {
|
||||
_return_attributes = rjson::copy(*previous_item);
|
||||
_return_attributes = std::move(*previous_item);
|
||||
} else {
|
||||
// If there is no previous item, usually a new item is created
|
||||
// and contains they given key. This may be cancelled at the end
|
||||
@@ -2590,44 +2206,88 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
}
|
||||
|
||||
if (!_update_expression.empty()) {
|
||||
for (auto& actions : _update_expression) {
|
||||
// The actions of _update_expression are grouped by top-level
|
||||
// attributes. Here, all actions in actions.second share the same
|
||||
// top-level attribute actions.first.
|
||||
std::string column_name = actions.first;
|
||||
std::unordered_set<std::string> seen_column_names;
|
||||
for (auto& action : _update_expression.actions()) {
|
||||
if (action._path.has_operators()) {
|
||||
// FIXME: implement this case
|
||||
throw api_error::validation("UpdateItem support for nested updates not yet implemented");
|
||||
}
|
||||
std::string column_name = action._path.root();
|
||||
const column_definition* cdef = _schema->get_column_definition(to_bytes(column_name));
|
||||
if (cdef && cdef->is_primary_key()) {
|
||||
throw api_error::validation(format("UpdateItem cannot update key column {}", column_name));
|
||||
throw api_error::validation(
|
||||
format("UpdateItem cannot update key column {}", column_name));
|
||||
}
|
||||
if (actions.second.has_value()) {
|
||||
// An action on a top-level attribute column_name. The single
|
||||
// action is actions.second.get_value(). We can simply invoke
|
||||
// the action and replace the attribute with its result:
|
||||
std::optional<rjson::value> result = action_result(actions.second.get_value(), previous_item.get());
|
||||
if (result) {
|
||||
do_update(to_bytes(column_name), *result);
|
||||
} else {
|
||||
// DynamoDB forbids multiple updates in the same expression to
|
||||
// modify overlapping document paths. Updates of one expression
|
||||
// have the same timestamp, so it's unclear which would "win".
|
||||
// FIXME: currently, without full support for document paths,
|
||||
// we only check if the paths' roots are the same.
|
||||
if (!seen_column_names.insert(column_name).second) {
|
||||
throw api_error::validation(
|
||||
format("Invalid UpdateExpression: two document paths overlap with each other: {} and {}.",
|
||||
column_name, column_name));
|
||||
}
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const parsed::update_expression::action::set& a) {
|
||||
auto value = calculate_value(a._rhs, previous_item.get());
|
||||
do_update(to_bytes(column_name), value);
|
||||
},
|
||||
[&] (const parsed::update_expression::action::remove& a) {
|
||||
do_delete(to_bytes(column_name));
|
||||
},
|
||||
[&] (const parsed::update_expression::action::add& a) {
|
||||
parsed::value base;
|
||||
parsed::value addition;
|
||||
base.set_path(action._path);
|
||||
addition.set_constant(a._valref);
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
rjson::value result;
|
||||
// An ADD can be used to create a new attribute (when
|
||||
// v1.IsNull()) or to add to a pre-existing attribute:
|
||||
if (v1.IsNull()) {
|
||||
std::string v2_type = get_item_type_string(v2);
|
||||
if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
|
||||
result = v2;
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
|
||||
}
|
||||
} else {
|
||||
std::string v1_type = get_item_type_string(v1);
|
||||
if (v1_type == "N") {
|
||||
if (get_item_type_string(v2) != "N") {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = number_add(v1, v2);
|
||||
} else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
|
||||
if (get_item_type_string(v2) != v1_type) {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = set_sum(v1, v2);
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
|
||||
}
|
||||
}
|
||||
do_update(to_bytes(column_name), result);
|
||||
},
|
||||
[&] (const parsed::update_expression::action::del& a) {
|
||||
parsed::value base;
|
||||
parsed::value subset;
|
||||
base.set_path(action._path);
|
||||
subset.set_constant(a._valref);
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
rjson::value v2 = calculate_value(subset, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
if (!v1.IsNull()) {
|
||||
std::optional<rjson::value> result = set_diff(v1, v2);
|
||||
if (result) {
|
||||
do_update(to_bytes(column_name), *result);
|
||||
} else {
|
||||
do_delete(to_bytes(column_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// We have actions on a path or more than one path in the same
|
||||
// top-level attribute column_name - but not on the top-level
|
||||
// attribute as a whole. We already read the full top-level
|
||||
// attribute (see check_needs_read_before_write()), and now we
|
||||
// need to modify pieces of it and write back the entire
|
||||
// top-level attribute.
|
||||
if (!previous_item) {
|
||||
throw api_error::validation(format("UpdateItem cannot update nested document path on non-existent item"));
|
||||
}
|
||||
const rjson::value *toplevel = rjson::find(*previous_item, column_name);
|
||||
if (!toplevel) {
|
||||
throw api_error::validation(format("UpdateItem cannot update document path: missing attribute {}",
|
||||
column_name));
|
||||
}
|
||||
rjson::value result = rjson::copy(*toplevel);
|
||||
hierarchy_actions(result, actions.second, previous_item.get());
|
||||
do_update(to_bytes(column_name), std::move(result), &actions.second);
|
||||
}
|
||||
}, action._action);
|
||||
}
|
||||
}
|
||||
if (_returnvalues == returnvalues::ALL_OLD && previous_item) {
|
||||
@@ -2745,7 +2405,7 @@ static rjson::value describe_item(schema_ptr schema,
|
||||
const query::partition_slice& slice,
|
||||
const cql3::selection::selection& selection,
|
||||
const query::result& query_result,
|
||||
const attrs_to_get& attrs_to_get) {
|
||||
const std::unordered_set<std::string>& attrs_to_get) {
|
||||
std::optional<rjson::value> opt_item = executor::describe_single_item(std::move(schema), slice, selection, std::move(query_result), attrs_to_get);
|
||||
if (!opt_item) {
|
||||
// If there is no matching item, we're supposed to return an empty
|
||||
@@ -2817,7 +2477,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
|
||||
struct table_requests {
|
||||
schema_ptr schema;
|
||||
db::consistency_level cl;
|
||||
::shared_ptr<const attrs_to_get> attrs_to_get;
|
||||
std::unordered_set<std::string> attrs_to_get;
|
||||
struct single_request {
|
||||
partition_key pk;
|
||||
clustering_key ck;
|
||||
@@ -2832,7 +2492,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
|
||||
tracing::add_table_name(trace_state, sstring(executor::KEYSPACE_NAME_PREFIX) + rs.schema->cf_name(), rs.schema->cf_name());
|
||||
rs.cl = get_read_consistency(it->value);
|
||||
std::unordered_set<std::string> used_attribute_names;
|
||||
rs.attrs_to_get = ::make_shared<const attrs_to_get>(calculate_attrs_to_get(it->value, used_attribute_names));
|
||||
rs.attrs_to_get = calculate_attrs_to_get(it->value, used_attribute_names);
|
||||
verify_all_are_used(request, "ExpressionAttributeNames", used_attribute_names, "GetItem");
|
||||
auto& keys = (it->value)["Keys"];
|
||||
for (const rjson::value& key : keys.GetArray()) {
|
||||
@@ -2862,7 +2522,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
|
||||
future<std::tuple<std::string, std::optional<rjson::value>>> f = _proxy.query(rs.schema, std::move(command), std::move(partition_ranges), rs.cl,
|
||||
service::storage_proxy::coordinator_query_options(executor::default_timeout(), permit, client_state, trace_state)).then(
|
||||
[schema = rs.schema, partition_slice = std::move(partition_slice), selection = std::move(selection), attrs_to_get = rs.attrs_to_get] (service::storage_proxy::coordinator_query_result qr) mutable {
|
||||
std::optional<rjson::value> json = describe_single_item(schema, partition_slice, *selection, *qr.query_result, *attrs_to_get);
|
||||
std::optional<rjson::value> json = describe_single_item(schema, partition_slice, *selection, *qr.query_result, std::move(attrs_to_get));
|
||||
return make_ready_future<std::tuple<std::string, std::optional<rjson::value>>>(
|
||||
std::make_tuple(schema->cf_name(), std::move(json)));
|
||||
});
|
||||
@@ -2954,9 +2614,6 @@ filter::filter(const rjson::value& request, request_type rt,
|
||||
if (expression->GetStringLength() == 0) {
|
||||
throw api_error::validation("FilterExpression must not be empty");
|
||||
}
|
||||
if (rjson::find(request, "AttributesToGet")) {
|
||||
throw api_error::validation("Cannot use both old-style and new-style parameters in same request: FilterExpression and AttributesToGet");
|
||||
}
|
||||
try {
|
||||
// FIXME: make parse_condition_expression take string_view, get
|
||||
// rid of the silly conversion to std::string.
|
||||
@@ -2972,9 +2629,6 @@ filter::filter(const rjson::value& request, request_type rt,
|
||||
}
|
||||
}
|
||||
if (conditions) {
|
||||
if (rjson::find(request, "ProjectionExpression")) {
|
||||
throw api_error::validation(format("Cannot use both old-style and new-style parameters in same request: {} and ProjectionExpression", conditions_attribute));
|
||||
}
|
||||
bool require_all = conditional_operator != conditional_operator_type::OR;
|
||||
_imp = conditions_filter { require_all, rjson::copy(*conditions) };
|
||||
}
|
||||
@@ -3031,7 +2685,7 @@ void filter::for_filters_on(const noncopyable_function<void(std::string_view)>&
|
||||
class describe_items_visitor {
|
||||
typedef std::vector<const column_definition*> columns_t;
|
||||
const columns_t& _columns;
|
||||
const attrs_to_get& _attrs_to_get;
|
||||
const std::unordered_set<std::string>& _attrs_to_get;
|
||||
std::unordered_set<std::string> _extra_filter_attrs;
|
||||
const filter& _filter;
|
||||
typename columns_t::const_iterator _column_it;
|
||||
@@ -3040,7 +2694,7 @@ class describe_items_visitor {
|
||||
size_t _scanned_count;
|
||||
|
||||
public:
|
||||
describe_items_visitor(const columns_t& columns, const attrs_to_get& attrs_to_get, filter& filter)
|
||||
describe_items_visitor(const columns_t& columns, const std::unordered_set<std::string>& attrs_to_get, filter& filter)
|
||||
: _columns(columns)
|
||||
, _attrs_to_get(attrs_to_get)
|
||||
, _filter(filter)
|
||||
@@ -3089,12 +2743,6 @@ public:
|
||||
std::string attr_name = value_cast<sstring>(entry.first);
|
||||
if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
|
||||
bytes value = value_cast<bytes>(entry.second);
|
||||
// Even if _attrs_to_get asked to keep only a part of a
|
||||
// top-level attribute, we keep the entire attribute
|
||||
// at this stage, because the item filter might still
|
||||
// need the other parts (it was easier for us to keep
|
||||
// extra_filter_attrs at top-level granularity). We'll
|
||||
// filter the unneeded parts after item filtering.
|
||||
rjson::set_with_string_name(_item, attr_name, deserialize_item(value));
|
||||
}
|
||||
}
|
||||
@@ -3105,24 +2753,11 @@ public:
|
||||
|
||||
void end_row() {
|
||||
if (_filter.check(_item)) {
|
||||
// As noted above, we kept entire top-level attributes listed in
|
||||
// _attrs_to_get. We may need to only keep parts of them.
|
||||
for (const auto& attr: _attrs_to_get) {
|
||||
// If !attr.has_value() it means we were asked not to keep
|
||||
// attr entirely, but just parts of it.
|
||||
if (!attr.second.has_value()) {
|
||||
rjson::value* toplevel= rjson::find(_item, attr.first);
|
||||
if (toplevel && !hierarchy_filter(*toplevel, attr.second)) {
|
||||
rjson::remove_member(_item, attr.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Remove the extra attributes _extra_filter_attrs which we had
|
||||
// to add just for the filter, and not requested to be returned:
|
||||
for (const auto& attr : _extra_filter_attrs) {
|
||||
rjson::remove_member(_item, attr);
|
||||
}
|
||||
|
||||
rjson::push_back(_items, std::move(_item));
|
||||
}
|
||||
_item = rjson::empty_object();
|
||||
@@ -3138,7 +2773,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static rjson::value describe_items(schema_ptr schema, const query::partition_slice& slice, const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, attrs_to_get&& attrs_to_get, filter&& filter) {
|
||||
static rjson::value describe_items(schema_ptr schema, const query::partition_slice& slice, const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, std::unordered_set<std::string>&& attrs_to_get, filter&& filter) {
|
||||
describe_items_visitor visitor(selection.get_columns(), attrs_to_get, filter);
|
||||
result_set->visit(visitor);
|
||||
auto scanned_count = visitor.get_scanned_count();
|
||||
@@ -3179,7 +2814,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
|
||||
const rjson::value* exclusive_start_key,
|
||||
dht::partition_range_vector&& partition_ranges,
|
||||
std::vector<query::clustering_range>&& ck_bounds,
|
||||
attrs_to_get&& attrs_to_get,
|
||||
std::unordered_set<std::string>&& attrs_to_get,
|
||||
uint32_t limit,
|
||||
db::consistency_level cl,
|
||||
filter&& filter,
|
||||
@@ -3214,12 +2849,12 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
|
||||
auto query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, std::move(permit));
|
||||
|
||||
command->slice.options.set<query::partition_slice::option::allow_short_read>();
|
||||
auto query_options = std::make_unique<cql3::query_options>(cl, std::vector<cql3::raw_value>{});
|
||||
auto query_options = std::make_unique<cql3::query_options>(cl, infinite_timeout_config, std::vector<cql3::raw_value>{});
|
||||
query_options = std::make_unique<cql3::query_options>(std::move(query_options), std::move(paging_state));
|
||||
auto p = service::pager::query_pagers::pager(schema, selection, *query_state_ptr, *query_options, command, std::move(partition_ranges), nullptr);
|
||||
|
||||
return p->fetch_page(limit, gc_clock::now(), executor::default_timeout()).then(
|
||||
[p = std::move(p), schema, cql_stats, partition_slice = std::move(partition_slice),
|
||||
[p, schema, cql_stats, partition_slice = std::move(partition_slice),
|
||||
selection = std::move(selection), query_state_ptr = std::move(query_state_ptr),
|
||||
attrs_to_get = std::move(attrs_to_get),
|
||||
query_options = std::move(query_options),
|
||||
@@ -3905,10 +3540,26 @@ future<> executor::create_keyspace(std::string_view keyspace_name) {
|
||||
}
|
||||
auto opts = get_network_topology_options(rf);
|
||||
auto ksm = keyspace_metadata::new_keyspace(keyspace_name_str, "org.apache.cassandra.locator.NetworkTopologyStrategy", std::move(opts), true);
|
||||
return _mm.announce_new_keyspace(ksm, api::new_timestamp());
|
||||
return _mm.announce_new_keyspace(ksm, api::new_timestamp(), false);
|
||||
});
|
||||
}
|
||||
|
||||
static tracing::trace_state_ptr create_tracing_session() {
|
||||
tracing::trace_state_props_set props;
|
||||
props.set<tracing::trace_state_props::full_tracing>();
|
||||
return tracing::tracing::get_local_tracing_instance().create_session(tracing::trace_type::QUERY, props);
|
||||
}
|
||||
|
||||
tracing::trace_state_ptr executor::maybe_trace_query(client_state& client_state, sstring_view op, sstring_view query) {
|
||||
tracing::trace_state_ptr trace_state;
|
||||
if (tracing::tracing::get_local_tracing_instance().trace_next_query()) {
|
||||
trace_state = create_tracing_session();
|
||||
tracing::add_query(trace_state, query);
|
||||
tracing::begin(trace_state, format("Alternator {}", op), client_state.get_client_address());
|
||||
}
|
||||
return trace_state;
|
||||
}
|
||||
|
||||
future<> executor::start() {
|
||||
// Currently, nothing to do on initialization. We delay the keyspace
|
||||
// creation (create_keyspace()) until a table is actually created.
|
||||
|
||||
@@ -53,10 +53,6 @@ namespace service {
|
||||
class storage_service;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
class metadata;
|
||||
}
|
||||
|
||||
namespace alternator {
|
||||
|
||||
class rmw_operation;
|
||||
@@ -74,77 +70,11 @@ public:
|
||||
std::string to_json() const override;
|
||||
};
|
||||
|
||||
namespace parsed {
|
||||
class path;
|
||||
};
|
||||
|
||||
// An attribute_path_map object is used to hold data for various attributes
|
||||
// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
|
||||
// has a root attribute, and then modified by member and index operators -
|
||||
// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
|
||||
// "[2]" index, and finally ".c" member.
|
||||
// Data can be added to an attribute_path_map using the add() function, but
|
||||
// requires that attributes with data not be *overlapping* or *conflicting*:
|
||||
//
|
||||
// 1. Two attribute paths which are identical or an ancestor of one another
|
||||
// are considered *overlapping* and not allowed. If a.b.c has data,
|
||||
// we can't add more data in a.b.c or any of its descendants like a.b.c.d.
|
||||
//
|
||||
// 2. Two attribute paths which need the same parent to have both a member and
|
||||
// an index are considered *conflicting* and not allowed. E.g., if a.b has
|
||||
// data, you can't add a[1]. The meaning of adding both would be that the
|
||||
// attribute a is both a map and an array, which isn't sensible.
|
||||
//
|
||||
// These two requirements are common to the two places where Alternator uses
|
||||
// this abstraction to describe how a hierarchical item is to be transformed:
|
||||
//
|
||||
// 1. In ProjectExpression: for filtering from a full top-level attribute
|
||||
// only the parts for which user asked in ProjectionExpression.
|
||||
//
|
||||
// 2. In UpdateExpression: for taking the previous value of a top-level
|
||||
// attribute, and modifying it based on the instructions in the user
|
||||
// wrote in UpdateExpression.
|
||||
|
||||
template<typename T>
|
||||
class attribute_path_map_node {
|
||||
public:
|
||||
using data_t = T;
|
||||
// We need the extra unique_ptr<> here because libstdc++ unordered_map
|
||||
// doesn't work with incomplete types :-(
|
||||
using members_t = std::unordered_map<std::string, std::unique_ptr<attribute_path_map_node<T>>>;
|
||||
// The indexes list is sorted because DynamoDB requires handling writes
|
||||
// beyond the end of a list in index order.
|
||||
using indexes_t = std::map<unsigned, std::unique_ptr<attribute_path_map_node<T>>>;
|
||||
// The prohibition on "overlap" and "conflict" explained above means
|
||||
// That only one of data, members or indexes is non-empty.
|
||||
std::optional<std::variant<data_t, members_t, indexes_t>> _content;
|
||||
|
||||
bool is_empty() const { return !_content; }
|
||||
bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
|
||||
bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
|
||||
bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
|
||||
// get_members() assumes that has_members() is true
|
||||
members_t& get_members() { return std::get<members_t>(*_content); }
|
||||
const members_t& get_members() const { return std::get<members_t>(*_content); }
|
||||
indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
|
||||
const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
|
||||
T& get_value() { return std::get<T>(*_content); }
|
||||
const T& get_value() const { return std::get<T>(*_content); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
|
||||
|
||||
using attrs_to_get_node = attribute_path_map_node<std::monostate>;
|
||||
using attrs_to_get = attribute_path_map<std::monostate>;
|
||||
|
||||
|
||||
class executor : public peering_sharded_service<executor> {
|
||||
service::storage_proxy& _proxy;
|
||||
service::migration_manager& _mm;
|
||||
db::system_distributed_keyspace& _sdks;
|
||||
service::storage_service& _ss;
|
||||
cdc::metadata& _cdc_metadata;
|
||||
// An smp_service_group to be used for limiting the concurrency when
|
||||
// forwarding Alternator request between shards - if necessary for LWT.
|
||||
smp_service_group _ssg;
|
||||
@@ -157,8 +87,8 @@ public:
|
||||
static constexpr auto KEYSPACE_NAME_PREFIX = "alternator_";
|
||||
static constexpr std::string_view INTERNAL_TABLE_PREFIX = ".scylla.alternator.";
|
||||
|
||||
executor(service::storage_proxy& proxy, service::migration_manager& mm, db::system_distributed_keyspace& sdks, service::storage_service& ss, cdc::metadata& cdc_metadata, smp_service_group ssg)
|
||||
: _proxy(proxy), _mm(mm), _sdks(sdks), _ss(ss), _cdc_metadata(cdc_metadata), _ssg(ssg) {}
|
||||
executor(service::storage_proxy& proxy, service::migration_manager& mm, db::system_distributed_keyspace& sdks, service::storage_service& ss, smp_service_group ssg)
|
||||
: _proxy(proxy), _mm(mm), _sdks(sdks), _ss(ss), _ssg(ssg) {}
|
||||
|
||||
future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
@@ -187,12 +117,10 @@ public:
|
||||
|
||||
future<> create_keyspace(std::string_view keyspace_name);
|
||||
|
||||
static tracing::trace_state_ptr maybe_trace_query(client_state& client_state, sstring_view op, sstring_view query);
|
||||
|
||||
static sstring table_name(const schema&);
|
||||
static db::timeout_clock::time_point default_timeout();
|
||||
static void set_default_timeout(db::timeout_clock::duration timeout);
|
||||
private:
|
||||
static db::timeout_clock::duration s_default_timeout;
|
||||
public:
|
||||
static schema_ptr find_table(service::storage_proxy&, const rjson::value& request);
|
||||
|
||||
private:
|
||||
@@ -208,14 +136,16 @@ public:
|
||||
const query::partition_slice&,
|
||||
const cql3::selection::selection&,
|
||||
const query::result&,
|
||||
const attrs_to_get&);
|
||||
const std::unordered_set<std::string>&);
|
||||
|
||||
static void describe_single_item(const cql3::selection::selection&,
|
||||
const std::vector<bytes_opt>&,
|
||||
const attrs_to_get&,
|
||||
const std::unordered_set<std::string>&,
|
||||
rjson::value&,
|
||||
bool = false);
|
||||
|
||||
|
||||
|
||||
void add_stream_options(const rjson::value& stream_spec, schema_builder&) const;
|
||||
void supplement_table_info(rjson::value& descr, const schema& schema) const;
|
||||
void supplement_table_stream_info(rjson::value& descr, const schema& schema) const;
|
||||
|
||||
@@ -130,27 +130,6 @@ void condition_expression::append(condition_expression&& a, char op) {
|
||||
}, _expression);
|
||||
}
|
||||
|
||||
void path::check_depth_limit() {
|
||||
if (1 + _operators.size() > depth_limit) {
|
||||
throw expressions_syntax_error(format("Document path exceeded {} nesting levels", depth_limit));
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const path& p) {
|
||||
os << p.root();
|
||||
for (const auto& op : p.operators()) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const std::string& member) {
|
||||
os << '.' << member;
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
os << '[' << index << ']';
|
||||
}
|
||||
}, op);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace parsed
|
||||
|
||||
// The following resolve_*() functions resolve references in parsed
|
||||
@@ -172,9 +151,10 @@ std::ostream& operator<<(std::ostream& os, const path& p) {
|
||||
// we need to resolve the expression just once but then use it many times
|
||||
// (once for each item to be filtered).
|
||||
|
||||
static std::optional<std::string> resolve_path_component(const std::string& column_name,
|
||||
static void resolve_path(parsed::path& p,
|
||||
const rjson::value* expression_attribute_names,
|
||||
std::unordered_set<std::string>& used_attribute_names) {
|
||||
const std::string& column_name = p.root();
|
||||
if (column_name.size() > 0 && column_name.front() == '#') {
|
||||
if (!expression_attribute_names) {
|
||||
throw api_error::validation(
|
||||
@@ -186,30 +166,7 @@ static std::optional<std::string> resolve_path_component(const std::string& colu
|
||||
format("ExpressionAttributeNames missing entry '{}' required by expression", column_name));
|
||||
}
|
||||
used_attribute_names.emplace(column_name);
|
||||
return std::string(rjson::to_string_view(*value));
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static void resolve_path(parsed::path& p,
|
||||
const rjson::value* expression_attribute_names,
|
||||
std::unordered_set<std::string>& used_attribute_names) {
|
||||
std::optional<std::string> r = resolve_path_component(p.root(), expression_attribute_names, used_attribute_names);
|
||||
if (r) {
|
||||
p.set_root(std::move(*r));
|
||||
}
|
||||
for (auto& op : p.operators()) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (std::string& s) {
|
||||
r = resolve_path_component(s, expression_attribute_names, used_attribute_names);
|
||||
if (r) {
|
||||
s = std::move(*r);
|
||||
}
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
// nothing to resolve
|
||||
}
|
||||
}, op);
|
||||
p.set_root(std::string(rjson::to_string_view(*value)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -666,55 +623,6 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
|
||||
},
|
||||
};
|
||||
|
||||
// Given a parsed::path and an item read from the table, extract the value
|
||||
// of a certain attribute path, such as "a" or "a.b.c[3]". Returns a null
|
||||
// value if the item or the requested attribute does not exist.
|
||||
// Note that the item is assumed to be encoded in JSON using DynamoDB
|
||||
// conventions - each level of a nested document is a map with one key -
|
||||
// a type (e.g., "M" for map) - and its value is the representation of
|
||||
// that value.
|
||||
static rjson::value extract_path(const rjson::value* item,
|
||||
const parsed::path& p, calculate_value_caller caller) {
|
||||
if (!item) {
|
||||
return rjson::null_value();
|
||||
}
|
||||
const rjson::value* v = rjson::find(*item, p.root());
|
||||
if (!v) {
|
||||
return rjson::null_value();
|
||||
}
|
||||
for (const auto& op : p.operators()) {
|
||||
if (!v->IsObject() || v->MemberCount() != 1) {
|
||||
// This shouldn't happen. We shouldn't have stored malformed
|
||||
// objects. But today Alternator does not validate the structure
|
||||
// of nested documents before storing them, so this can happen on
|
||||
// read.
|
||||
throw api_error::validation(format("{}: malformed item read: {}", *item));
|
||||
}
|
||||
const char* type = v->MemberBegin()->name.GetString();
|
||||
v = &(v->MemberBegin()->value);
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const std::string& member) {
|
||||
if (type[0] == 'M' && v->IsObject()) {
|
||||
v = rjson::find(*v, member);
|
||||
} else {
|
||||
v = nullptr;
|
||||
}
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
if (type[0] == 'L' && v->IsArray() && index < v->Size()) {
|
||||
v = &(v->GetArray()[index]);
|
||||
} else {
|
||||
v = nullptr;
|
||||
}
|
||||
}
|
||||
}, op);
|
||||
if (!v) {
|
||||
return rjson::null_value();
|
||||
}
|
||||
}
|
||||
return rjson::copy(*v);
|
||||
}
|
||||
|
||||
// Given a parsed::value, which can refer either to a constant value from
|
||||
// ExpressionAttributeValues, to the value of some attribute, or to a function
|
||||
// of other values, this function calculates the resulting value.
|
||||
@@ -732,12 +640,21 @@ rjson::value calculate_value(const parsed::value& v,
|
||||
auto function_it = function_handlers.find(std::string_view(f._function_name));
|
||||
if (function_it == function_handlers.end()) {
|
||||
throw api_error::validation(
|
||||
format("{}: unknown function '{}' called.", caller, f._function_name));
|
||||
format("UpdateExpression: unknown function '{}' called.", f._function_name));
|
||||
}
|
||||
return function_it->second(caller, previous_item, f);
|
||||
},
|
||||
[&] (const parsed::path& p) -> rjson::value {
|
||||
return extract_path(previous_item, p, caller);
|
||||
if (!previous_item) {
|
||||
return rjson::null_value();
|
||||
}
|
||||
std::string update_path = p.root();
|
||||
if (p.has_operators()) {
|
||||
// FIXME: support this
|
||||
throw api_error::validation("Reading attribute paths not yet implemented");
|
||||
}
|
||||
const rjson::value* previous_value = rjson::find(*previous_item, update_path);
|
||||
return previous_value ? rjson::copy(*previous_value) : rjson::null_value();
|
||||
}
|
||||
}, v._value);
|
||||
}
|
||||
|
||||
@@ -49,23 +49,15 @@ class path {
|
||||
// dot (e.g., ".xyz").
|
||||
std::string _root;
|
||||
std::vector<std::variant<std::string, unsigned>> _operators;
|
||||
// It is useful to limit the depth of a user-specified path, because is
|
||||
// allows us to use recursive algorithms without worrying about recursion
|
||||
// depth. DynamoDB officially limits the length of paths to 32 components
|
||||
// (including the root) so let's use the same limit.
|
||||
static constexpr unsigned depth_limit = 32;
|
||||
void check_depth_limit();
|
||||
public:
|
||||
void set_root(std::string root) {
|
||||
_root = std::move(root);
|
||||
}
|
||||
void add_index(unsigned i) {
|
||||
_operators.emplace_back(i);
|
||||
check_depth_limit();
|
||||
}
|
||||
void add_dot(std::string(name)) {
|
||||
_operators.emplace_back(std::move(name));
|
||||
check_depth_limit();
|
||||
}
|
||||
const std::string& root() const {
|
||||
return _root;
|
||||
@@ -73,13 +65,6 @@ public:
|
||||
bool has_operators() const {
|
||||
return !_operators.empty();
|
||||
}
|
||||
const std::vector<std::variant<std::string, unsigned>>& operators() const {
|
||||
return _operators;
|
||||
}
|
||||
std::vector<std::variant<std::string, unsigned>>& operators() {
|
||||
return _operators;
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream&, const path&);
|
||||
};
|
||||
|
||||
// When an expression is first parsed, all constants are references, like
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
#include "alternator/server.hh"
|
||||
#include "log.hh"
|
||||
#include <seastar/http/function_handlers.hh>
|
||||
#include <seastar/http/short_streams.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/json/json_elements.hh>
|
||||
#include "seastarx.hh"
|
||||
#include "error.hh"
|
||||
@@ -61,40 +59,6 @@ inline std::vector<std::string_view> split(std::string_view text, char separator
|
||||
return tokens;
|
||||
}
|
||||
|
||||
// Handle CORS (Cross-origin resource sharing) in the HTTP request:
|
||||
// If the request has the "Origin" header specifying where the script which
|
||||
// makes this request comes from, we need to reply with the header
|
||||
// "Access-Control-Allow-Origin: *" saying that this (and any) origin is fine.
|
||||
// Additionally, if preflight==true (i.e., this is an OPTIONS request),
|
||||
// the script can also "request" in headers that the server allows it to use
|
||||
// some HTTP methods and headers in the followup request, and the server
|
||||
// should respond by "allowing" them in the response headers.
|
||||
// We also add the header "Access-Control-Expose-Headers" to let the script
|
||||
// access additional headers in the response.
|
||||
// This handle_CORS() should be used when handling any HTTP method - both the
|
||||
// usual GET and POST, and also the "preflight" OPTIONS method.
|
||||
static void handle_CORS(const request& req, reply& rep, bool preflight) {
|
||||
if (!req.get_header("origin").empty()) {
|
||||
rep.add_header("Access-Control-Allow-Origin", "*");
|
||||
// This is the list that DynamoDB returns for expose headers. I am
|
||||
// not sure why not just return "*" here, what's the risk?
|
||||
rep.add_header("Access-Control-Expose-Headers", "x-amzn-RequestId,x-amzn-ErrorType,x-amzn-ErrorMessage,Date");
|
||||
if (preflight) {
|
||||
sstring s = req.get_header("Access-Control-Request-Headers");
|
||||
if (!s.empty()) {
|
||||
rep.add_header("Access-Control-Allow-Headers", std::move(s));
|
||||
}
|
||||
s = req.get_header("Access-Control-Request-Method");
|
||||
if (!s.empty()) {
|
||||
rep.add_header("Access-Control-Allow-Methods", std::move(s));
|
||||
}
|
||||
// Our CORS response never change anyway, let the browser cache it
|
||||
// for two hours (Chrome's maximum):
|
||||
rep.add_header("Access-Control-Max-Age", "7200");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DynamoDB HTTP error responses are structured as follows
|
||||
// https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Programming.Errors.html
|
||||
// Our handlers throw an exception to report an error. If the exception
|
||||
@@ -129,10 +93,6 @@ public:
|
||||
[&] (const json::json_return_type& json_return_value) {
|
||||
slogger.trace("api_handler success case");
|
||||
if (json_return_value._body_writer) {
|
||||
// Unfortunately, write_body() forces us to choose
|
||||
// from a fixed and irrelevant list of "mime-types"
|
||||
// at this point. But we'll override it with the
|
||||
// one (application/x-amz-json-1.0) below.
|
||||
rep->write_body("json", std::move(json_return_value._body_writer));
|
||||
} else {
|
||||
rep->_content += json_return_value._res;
|
||||
@@ -145,16 +105,14 @@ public:
|
||||
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
});
|
||||
}) { }
|
||||
}), _type("json") { }
|
||||
|
||||
api_handler(const api_handler&) = default;
|
||||
future<std::unique_ptr<reply>> handle(const sstring& path,
|
||||
std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
|
||||
handle_CORS(*req, *rep, false);
|
||||
return _f_handle(std::move(req), std::move(rep)).then(
|
||||
[this](std::unique_ptr<reply> rep) {
|
||||
rep->set_mime_type("application/x-amz-json-1.0");
|
||||
rep->done();
|
||||
rep->done(_type);
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
});
|
||||
}
|
||||
@@ -168,6 +126,7 @@ protected:
|
||||
}
|
||||
|
||||
future_handler_function _f_handle;
|
||||
sstring _type;
|
||||
};
|
||||
|
||||
class gated_handler : public handler_base {
|
||||
@@ -187,7 +146,6 @@ public:
|
||||
health_handler(seastar::gate& pending_requests) : gated_handler(pending_requests) {}
|
||||
protected:
|
||||
virtual future<std::unique_ptr<reply>> do_handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
|
||||
handle_CORS(*req, *rep, false);
|
||||
rep->set_status(reply::status_type::ok);
|
||||
rep->write_body("txt", format("healthy: {}", req->get_header("Host")));
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
@@ -220,23 +178,7 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
// The CORS (Cross-origin resource sharing) protocol can send an OPTIONS
|
||||
// request before ("pre-flight") the main request. The response to this
|
||||
// request can be empty, but needs to have the right headers (which we
|
||||
// fill with handle_CORS())
|
||||
class options_handler : public gated_handler {
|
||||
public:
|
||||
options_handler(seastar::gate& pending_requests) : gated_handler(pending_requests) {}
|
||||
protected:
|
||||
virtual future<std::unique_ptr<reply>> do_handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
|
||||
handle_CORS(*req, *rep, true);
|
||||
rep->set_status(reply::status_type::ok);
|
||||
rep->write_body("txt", sstring(""));
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
}
|
||||
};
|
||||
|
||||
future<> server::verify_signature(const request& req, const chunked_content& content) {
|
||||
future<> server::verify_signature(const request& req) {
|
||||
if (!_enforce_authorization) {
|
||||
slogger.debug("Skipping authorization");
|
||||
return make_ready_future<>();
|
||||
@@ -247,34 +189,27 @@ future<> server::verify_signature(const request& req, const chunked_content& con
|
||||
}
|
||||
auto authorization_it = req._headers.find("Authorization");
|
||||
if (authorization_it == req._headers.end()) {
|
||||
throw api_error::missing_authentication_token("Authorization header is mandatory for signature verification");
|
||||
throw api_error::invalid_signature("Authorization header is mandatory for signature verification");
|
||||
}
|
||||
std::string host = host_it->second;
|
||||
std::string_view authorization_header = authorization_it->second;
|
||||
auto pos = authorization_header.find_first_of(' ');
|
||||
if (pos == std::string_view::npos || authorization_header.substr(0, pos) != "AWS4-HMAC-SHA256") {
|
||||
throw api_error::invalid_signature(format("Authorization header must use AWS4-HMAC-SHA256 algorithm: {}", authorization_header));
|
||||
}
|
||||
authorization_header.remove_prefix(pos+1);
|
||||
std::vector<std::string_view> credentials_raw = split(authorization_it->second, ' ');
|
||||
std::string credential;
|
||||
std::string user_signature;
|
||||
std::string signed_headers_str;
|
||||
std::vector<std::string_view> signed_headers;
|
||||
do {
|
||||
// Either one of a comma or space can mark the end of an entry
|
||||
pos = authorization_header.find_first_of(" ,");
|
||||
std::string_view entry = authorization_header.substr(0, pos);
|
||||
if (pos != std::string_view::npos) {
|
||||
authorization_header.remove_prefix(pos + 1);
|
||||
}
|
||||
if (entry.empty()) {
|
||||
continue;
|
||||
}
|
||||
for (std::string_view entry : credentials_raw) {
|
||||
std::vector<std::string_view> entry_split = split(entry, '=');
|
||||
if (entry_split.size() != 2) {
|
||||
if (entry != "AWS4-HMAC-SHA256") {
|
||||
throw api_error::invalid_signature(format("Only AWS4-HMAC-SHA256 algorithm is supported. Found: {}", entry));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
std::string_view auth_value = entry_split[1];
|
||||
// Commas appear as an additional (quite redundant) delimiter
|
||||
if (auth_value.back() == ',') {
|
||||
auth_value.remove_suffix(1);
|
||||
}
|
||||
if (entry_split[0] == "Credential") {
|
||||
credential = std::string(auth_value);
|
||||
} else if (entry_split[0] == "Signature") {
|
||||
@@ -284,8 +219,7 @@ future<> server::verify_signature(const request& req, const chunked_content& con
|
||||
signed_headers = split(auth_value, ';');
|
||||
std::sort(signed_headers.begin(), signed_headers.end());
|
||||
}
|
||||
} while (pos != std::string_view::npos);
|
||||
|
||||
}
|
||||
std::vector<std::string_view> credential_split = split(credential, '/');
|
||||
if (credential_split.size() != 5) {
|
||||
throw api_error::validation(format("Incorrect credential information format: {}", credential));
|
||||
@@ -309,10 +243,10 @@ future<> server::verify_signature(const request& req, const chunked_content& con
|
||||
}
|
||||
}
|
||||
|
||||
auto cache_getter = [&qp = _qp] (std::string username) {
|
||||
return get_key_from_roles(qp, std::move(username));
|
||||
auto cache_getter = [] (std::string username) {
|
||||
return get_key_from_roles(cql3::get_query_processor().local(), std::move(username));
|
||||
};
|
||||
return _key_cache.get_ptr(user, cache_getter).then([this, &req, &content,
|
||||
return _key_cache.get_ptr(user, cache_getter).then([this, &req,
|
||||
user = std::move(user),
|
||||
host = std::move(host),
|
||||
datestamp = std::move(datestamp),
|
||||
@@ -322,7 +256,7 @@ future<> server::verify_signature(const request& req, const chunked_content& con
|
||||
service = std::move(service),
|
||||
user_signature = std::move(user_signature)] (key_cache::value_ptr key_ptr) {
|
||||
std::string signature = get_signature(user, *key_ptr, std::string_view(host), req._method,
|
||||
datestamp, signed_headers_str, signed_headers_map, content, region, service, "");
|
||||
datestamp, signed_headers_str, signed_headers_map, req.content, region, service, "");
|
||||
|
||||
if (signature != std::string_view(user_signature)) {
|
||||
_key_cache.remove(user);
|
||||
@@ -331,91 +265,43 @@ future<> server::verify_signature(const request& req, const chunked_content& con
|
||||
});
|
||||
}
|
||||
|
||||
static tracing::trace_state_ptr create_tracing_session(tracing::tracing& tracing_instance) {
|
||||
tracing::trace_state_props_set props;
|
||||
props.set<tracing::trace_state_props::full_tracing>();
|
||||
props.set_if<tracing::trace_state_props::log_slow_query>(tracing_instance.slow_query_tracing_enabled());
|
||||
return tracing_instance.create_session(tracing::trace_type::QUERY, props);
|
||||
}
|
||||
|
||||
// truncated_content_view() prints a potentially long chunked_content for
|
||||
// debugging purposes. In the common case when the content is not excessively
|
||||
// long, it just returns a view into the given content, without any copying.
|
||||
// But when the content is very long, it is truncated after some arbitrary
|
||||
// max_len (or one chunk, whichever comes first), with "<truncated>" added at
|
||||
// the end. To do this modification to the string, we need to create a new
|
||||
// std::string, so the caller must pass us a reference to one, "buf", where
|
||||
// we can store the content. The returned view is only alive for as long this
|
||||
// buf is kept alive.
|
||||
static std::string_view truncated_content_view(const chunked_content& content, std::string& buf) {
|
||||
constexpr size_t max_len = 1024;
|
||||
if (content.empty()) {
|
||||
return std::string_view();
|
||||
} else if (content.size() == 1 && content.begin()->size() <= max_len) {
|
||||
return std::string_view(content.begin()->get(), content.begin()->size());
|
||||
} else {
|
||||
buf = std::string(content.begin()->get(), std::min(content.begin()->size(), max_len)) + "<truncated>";
|
||||
return std::string_view(buf);
|
||||
}
|
||||
}
|
||||
|
||||
static tracing::trace_state_ptr maybe_trace_query(service::client_state& client_state, sstring_view op, const chunked_content& query) {
|
||||
tracing::trace_state_ptr trace_state;
|
||||
tracing::tracing& tracing_instance = tracing::tracing::get_local_tracing_instance();
|
||||
if (tracing_instance.trace_next_query() || tracing_instance.slow_query_tracing_enabled()) {
|
||||
trace_state = create_tracing_session(tracing_instance);
|
||||
std::string buf;
|
||||
tracing::add_session_param(trace_state, "alternator_op", op);
|
||||
tracing::add_query(trace_state, truncated_content_view(query, buf));
|
||||
tracing::begin(trace_state, format("Alternator {}", op), client_state.get_client_address());
|
||||
}
|
||||
return trace_state;
|
||||
}
|
||||
|
||||
future<executor::request_return_type> server::handle_api_request(std::unique_ptr<request> req) {
|
||||
future<executor::request_return_type> server::handle_api_request(std::unique_ptr<request>&& req) {
|
||||
_executor._stats.total_operations++;
|
||||
sstring target = req->get_header(TARGET);
|
||||
std::vector<std::string_view> split_target = split(target, '.');
|
||||
//NOTICE(sarna): Target consists of Dynamo API version followed by a dot '.' and operation type (e.g. CreateTable)
|
||||
std::string op = split_target.empty() ? std::string() : std::string(split_target.back());
|
||||
// JSON parsing can allocate up to roughly 2x the size of the raw
|
||||
// document, + a couple of bytes for maintenance.
|
||||
// TODO: consider the case where req->content_length is missing. Maybe
|
||||
// we need to take the content_length_limit and return some of the units
|
||||
// when we finish read_content_and_verify_signature?
|
||||
size_t mem_estimate = req->content_length * 2 + 8000;
|
||||
auto units_fut = get_units(*_memory_limiter, mem_estimate);
|
||||
if (_memory_limiter->waiters()) {
|
||||
++_executor._stats.requests_blocked_memory;
|
||||
}
|
||||
auto units = co_await std::move(units_fut);
|
||||
assert(req->content_stream);
|
||||
chunked_content content = co_await httpd::read_entire_stream(*req->content_stream);
|
||||
co_await verify_signature(*req, content);
|
||||
|
||||
if (slogger.is_enabled(log_level::trace)) {
|
||||
std::string buf;
|
||||
slogger.trace("Request: {} {} {}", op, truncated_content_view(content, buf), req->_headers);
|
||||
}
|
||||
auto callback_it = _callbacks.find(op);
|
||||
if (callback_it == _callbacks.end()) {
|
||||
_executor._stats.unsupported_operations++;
|
||||
co_return api_error::unknown_operation(format("Unsupported operation {}", op));
|
||||
}
|
||||
if (_pending_requests.get_count() >= _max_concurrent_requests) {
|
||||
_executor._stats.requests_shed++;
|
||||
co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count()));
|
||||
}
|
||||
_pending_requests.enter();
|
||||
auto leave = defer([this] { _pending_requests.leave(); });
|
||||
//FIXME: Client state can provide more context, e.g. client's endpoint address
|
||||
// We use unique_ptr because client_state cannot be moved or copied
|
||||
executor::client_state client_state{executor::client_state::internal_tag()};
|
||||
tracing::trace_state_ptr trace_state = maybe_trace_query(client_state, op, content);
|
||||
tracing::trace(trace_state, op);
|
||||
rjson::value json_request = co_await _json_parser.parse(std::move(content));
|
||||
co_return co_await callback_it->second(_executor, client_state, trace_state,
|
||||
make_service_permit(std::move(units)), std::move(json_request), std::move(req));
|
||||
slogger.trace("Request: {} {} {}", op, req->content, req->_headers);
|
||||
return verify_signature(*req).then([this, op, req = std::move(req)] () mutable {
|
||||
auto callback_it = _callbacks.find(op);
|
||||
if (callback_it == _callbacks.end()) {
|
||||
_executor._stats.unsupported_operations++;
|
||||
throw api_error::unknown_operation(format("Unsupported operation {}", op));
|
||||
}
|
||||
return with_gate(_pending_requests, [this, callback_it = std::move(callback_it), op = std::move(op), req = std::move(req)] () mutable {
|
||||
//FIXME: Client state can provide more context, e.g. client's endpoint address
|
||||
// We use unique_ptr because client_state cannot be moved or copied
|
||||
return do_with(std::make_unique<executor::client_state>(executor::client_state::internal_tag()),
|
||||
[this, callback_it = std::move(callback_it), op = std::move(op), req = std::move(req)] (std::unique_ptr<executor::client_state>& client_state) mutable {
|
||||
tracing::trace_state_ptr trace_state = executor::maybe_trace_query(*client_state, op, req->content);
|
||||
tracing::trace(trace_state, op);
|
||||
// JSON parsing can allocate up to roughly 2x the size of the raw document, + a couple of bytes for maintenance.
|
||||
// FIXME: by this time, the whole HTTP request was already read, so some memory is already occupied.
|
||||
// Once HTTP allows working on streams, we should grab the permit *before* reading the HTTP payload.
|
||||
size_t mem_estimate = req->content.size() * 3 + 8000;
|
||||
auto units_fut = get_units(*_memory_limiter, mem_estimate);
|
||||
if (_memory_limiter->waiters()) {
|
||||
++_executor._stats.requests_blocked_memory;
|
||||
}
|
||||
return units_fut.then([this, callback_it = std::move(callback_it), &client_state, trace_state, req = std::move(req)] (semaphore_units<> units) mutable {
|
||||
return _json_parser.parse(req->content).then([this, callback_it = std::move(callback_it), &client_state, trace_state,
|
||||
units = std::move(units), req = std::move(req)] (rjson::value json_request) mutable {
|
||||
return callback_it->second(_executor, *client_state, trace_state, make_service_permit(std::move(units)), std::move(json_request), std::move(req)).finally([trace_state] {});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void server::set_routes(routes& r) {
|
||||
@@ -437,17 +323,15 @@ void server::set_routes(routes& r) {
|
||||
// scan an entire subnet for nodes responding to the health request,
|
||||
// or even just scan for open ports.
|
||||
r.put(operation_type::GET, "/localnodes", new local_nodelist_handler(_pending_requests));
|
||||
r.put(operation_type::OPTIONS, "/", new options_handler(_pending_requests));
|
||||
}
|
||||
|
||||
//FIXME: A way to immediately invalidate the cache should be considered,
|
||||
// e.g. when the system table which stores the keys is changed.
|
||||
// For now, this propagation may take up to 1 minute.
|
||||
server::server(executor& exec, cql3::query_processor& qp)
|
||||
server::server(executor& exec)
|
||||
: _http_server("http-alternator")
|
||||
, _https_server("https-alternator")
|
||||
, _executor(exec)
|
||||
, _qp(qp)
|
||||
, _key_cache(1024, 1min, slogger)
|
||||
, _enforce_authorization(false)
|
||||
, _enabled_servers{}
|
||||
@@ -520,10 +404,9 @@ server::server(executor& exec, cql3::query_processor& qp)
|
||||
}
|
||||
|
||||
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
|
||||
bool enforce_authorization, semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests) {
|
||||
bool enforce_authorization, semaphore* memory_limiter) {
|
||||
_memory_limiter = memory_limiter;
|
||||
_enforce_authorization = enforce_authorization;
|
||||
_max_concurrent_requests = std::move(max_concurrent_requests);
|
||||
if (!port && !https_port) {
|
||||
return make_exception_future<>(std::runtime_error("Either regular port or TLS port"
|
||||
" must be specified in order to init an alternator HTTP server instance"));
|
||||
@@ -535,14 +418,12 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
|
||||
if (port) {
|
||||
set_routes(_http_server._routes);
|
||||
_http_server.set_content_length_limit(server::content_length_limit);
|
||||
_http_server.set_content_streaming(true);
|
||||
_http_server.listen(socket_address{addr, *port}).get();
|
||||
_enabled_servers.push_back(std::ref(_http_server));
|
||||
}
|
||||
if (https_port) {
|
||||
set_routes(_https_server._routes);
|
||||
_https_server.set_content_length_limit(server::content_length_limit);
|
||||
_https_server.set_content_streaming(true);
|
||||
_https_server.set_tls_credentials(creds->build_reloadable_server_credentials([](const std::unordered_set<sstring>& files, std::exception_ptr ep) {
|
||||
if (ep) {
|
||||
slogger.warn("Exception loading {}: {}", files, ep);
|
||||
@@ -580,7 +461,7 @@ server::json_parser::json_parser() : _run_parse_json_thread(async([this] {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
_parsed_document = rjson::parse_yieldable(std::move(_raw_document));
|
||||
_parsed_document = rjson::parse_yieldable(_raw_document);
|
||||
_current_exception = nullptr;
|
||||
} catch (...) {
|
||||
_current_exception = std::current_exception();
|
||||
@@ -590,12 +471,12 @@ server::json_parser::json_parser() : _run_parse_json_thread(async([this] {
|
||||
})) {
|
||||
}
|
||||
|
||||
future<rjson::value> server::json_parser::parse(chunked_content&& content) {
|
||||
future<rjson::value> server::json_parser::parse(std::string_view content) {
|
||||
if (content.size() < yieldable_parsing_threshold) {
|
||||
return make_ready_future<rjson::value>(rjson::parse(std::move(content)));
|
||||
return make_ready_future<rjson::value>(rjson::parse(content));
|
||||
}
|
||||
return with_semaphore(_parsing_sem, 1, [this, content = std::move(content)] () mutable {
|
||||
_raw_document = std::move(content);
|
||||
return with_semaphore(_parsing_sem, 1, [this, content] {
|
||||
_raw_document = content;
|
||||
_document_waiting.signal();
|
||||
return _document_parsed.wait().then([this] {
|
||||
if (_current_exception) {
|
||||
|
||||
@@ -28,13 +28,10 @@
|
||||
#include <optional>
|
||||
#include "alternator/auth.hh"
|
||||
#include "utils/small_vector.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
#include <seastar/core/units.hh>
|
||||
|
||||
namespace alternator {
|
||||
|
||||
using chunked_content = rjson::chunked_content;
|
||||
|
||||
class server {
|
||||
static constexpr size_t content_length_limit = 16*MB;
|
||||
using alternator_callback = std::function<future<executor::request_return_type>(executor&, executor::client_state&,
|
||||
@@ -44,7 +41,6 @@ class server {
|
||||
http_server _http_server;
|
||||
http_server _https_server;
|
||||
executor& _executor;
|
||||
cql3::query_processor& _qp;
|
||||
|
||||
key_cache _key_cache;
|
||||
bool _enforce_authorization;
|
||||
@@ -53,11 +49,10 @@ class server {
|
||||
alternator_callbacks_map _callbacks;
|
||||
|
||||
semaphore* _memory_limiter;
|
||||
utils::updateable_value<uint32_t> _max_concurrent_requests;
|
||||
|
||||
class json_parser {
|
||||
static constexpr size_t yieldable_parsing_threshold = 16*KB;
|
||||
chunked_content _raw_document;
|
||||
std::string_view _raw_document;
|
||||
rjson::value _parsed_document;
|
||||
std::exception_ptr _current_exception;
|
||||
semaphore _parsing_sem{1};
|
||||
@@ -67,24 +62,21 @@ class server {
|
||||
future<> _run_parse_json_thread;
|
||||
public:
|
||||
json_parser();
|
||||
// Moving a chunked_content into parse() allows parse() to free each
|
||||
// chunk as soon as it is parsed, so when chunks are relatively small,
|
||||
// we don't need to store the sum of unparsed and parsed sizes.
|
||||
future<rjson::value> parse(chunked_content&& content);
|
||||
future<rjson::value> parse(std::string_view content);
|
||||
future<> stop();
|
||||
};
|
||||
json_parser _json_parser;
|
||||
|
||||
public:
|
||||
server(executor& executor, cql3::query_processor& qp);
|
||||
server(executor& executor);
|
||||
|
||||
future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
|
||||
bool enforce_authorization, semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests);
|
||||
bool enforce_authorization, semaphore* memory_limiter);
|
||||
future<> stop();
|
||||
private:
|
||||
void set_routes(seastar::httpd::routes& r);
|
||||
future<> verify_signature(const seastar::httpd::request&, const chunked_content&);
|
||||
future<executor::request_return_type> handle_api_request(std::unique_ptr<request> req);
|
||||
future<> verify_signature(const seastar::httpd::request& r);
|
||||
future<executor::request_return_type> handle_api_request(std::unique_ptr<request>&& req);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -38,7 +38,6 @@ stats::stats() : api_operations{} {
|
||||
#define OPERATION_LATENCY(name, CamelCaseName) \
|
||||
seastar::metrics::make_histogram("op_latency", \
|
||||
seastar::metrics::description("Latency histogram of an operation via Alternator API"), {op(CamelCaseName)}, [this]{return to_metrics_histogram(api_operations.name);}),
|
||||
OPERATION(batch_get_item, "BatchGetItem")
|
||||
OPERATION(batch_write_item, "BatchWriteItem")
|
||||
OPERATION(create_backup, "CreateBackup")
|
||||
OPERATION(create_global_table, "CreateGlobalTable")
|
||||
@@ -97,8 +96,6 @@ stats::stats() : api_operations{} {
|
||||
seastar::metrics::description("number writes that had to be bounced from this shard because of LWT requirements")),
|
||||
seastar::metrics::make_total_operations("requests_blocked_memory", requests_blocked_memory,
|
||||
seastar::metrics::description("Counts a number of requests blocked due to memory pressure.")),
|
||||
seastar::metrics::make_total_operations("requests_shed", requests_shed,
|
||||
seastar::metrics::description("Counts a number of requests shed due to overload.")),
|
||||
seastar::metrics::make_total_operations("filtered_rows_read_total", cql_stats.filtered_rows_read_total,
|
||||
seastar::metrics::description("number of rows read during filtering operations")),
|
||||
seastar::metrics::make_total_operations("filtered_rows_matched_total", cql_stats.filtered_rows_matched_total,
|
||||
|
||||
@@ -92,7 +92,6 @@ public:
|
||||
uint64_t write_using_lwt = 0;
|
||||
uint64_t shard_bounce_for_lwt = 0;
|
||||
uint64_t requests_blocked_memory = 0;
|
||||
uint64_t requests_shed = 0;
|
||||
// CQL-derived stats
|
||||
cql3::cql_stats cql_stats;
|
||||
private:
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
#include "cdc/log.hh"
|
||||
#include "cdc/generation.hh"
|
||||
#include "cdc/cdc_options.hh"
|
||||
#include "cdc/metadata.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
#include "cql3/selection/selection.hh"
|
||||
@@ -291,9 +290,7 @@ struct sequence_number {
|
||||
sequence_number::sequence_number(std::string_view v)
|
||||
: uuid([&] {
|
||||
using namespace boost::multiprecision;
|
||||
// workaround for weird clang 10 bug when calling constructor with
|
||||
// view directly.
|
||||
uint128_t tmp{std::string(v)};
|
||||
uint128_t tmp{v};
|
||||
// see above
|
||||
return utils::UUID_gen::get_time_UUID_raw(uint64_t(tmp >> 64), uint64_t(tmp & std::numeric_limits<uint64_t>::max()));
|
||||
}())
|
||||
@@ -471,14 +468,13 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
auto status = "DISABLED";
|
||||
|
||||
if (opts.enabled()) {
|
||||
if (!_cdc_metadata.streams_available()) {
|
||||
auto& metadata = _ss.get_cdc_metadata();
|
||||
if (!metadata.streams_available()) {
|
||||
status = "ENABLING";
|
||||
} else {
|
||||
status = "ENABLED";
|
||||
}
|
||||
}
|
||||
|
||||
auto ttl = std::chrono::seconds(opts.ttl());
|
||||
|
||||
rjson::set(stream_desc, "StreamStatus", rjson::from_string(status));
|
||||
|
||||
@@ -498,12 +494,20 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
// TODO: label
|
||||
// TODO: creation time
|
||||
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
const auto& tm = _proxy.get_token_metadata();
|
||||
// cannot really "resume" query, must iterate all data. because we cannot query neither "time" (pk) > something,
|
||||
// or on expired...
|
||||
// TODO: maybe add secondary index to topology table to enable this?
|
||||
return _sdks.cdc_get_versioned_streams({ tm.count_normal_token_owners() }).then([this, &db, schema, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)](std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {
|
||||
|
||||
// filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
|
||||
auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);
|
||||
// filter out cdc generations older than the table or now() - dynamodb_streams_max_window (24h)
|
||||
auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - dynamodb_streams_max_window);
|
||||
|
||||
return _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners }).then([this, &db, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)] (std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {
|
||||
auto i = topologies.lower_bound(low_ts);
|
||||
// need first gen _intersecting_ the timestamp.
|
||||
if (i != topologies.begin()) {
|
||||
i = std::prev(i);
|
||||
}
|
||||
|
||||
auto e = topologies.end();
|
||||
auto prev = e;
|
||||
@@ -511,7 +515,9 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
|
||||
std::optional<shard_id> last;
|
||||
|
||||
auto i = topologies.begin();
|
||||
// i is now at the youngest generation we include. make a mark of it.
|
||||
auto first = i;
|
||||
|
||||
// if we're a paged query, skip to the generation where we left of.
|
||||
if (shard_start) {
|
||||
i = topologies.find(shard_start->time);
|
||||
@@ -537,7 +543,7 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
};
|
||||
|
||||
// need a prev even if we are skipping stuff
|
||||
if (i != topologies.begin()) {
|
||||
if (i != first) {
|
||||
prev = std::prev(i);
|
||||
}
|
||||
|
||||
@@ -845,18 +851,16 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
static const bytes op_column_name = cdc::log_meta_column_name_bytes("operation");
|
||||
static const bytes eor_column_name = cdc::log_meta_column_name_bytes("end_of_batch");
|
||||
|
||||
auto key_names = boost::copy_range<attrs_to_get>(
|
||||
auto key_names = boost::copy_range<std::unordered_set<std::string>>(
|
||||
boost::range::join(std::move(base->partition_key_columns()), std::move(base->clustering_key_columns()))
|
||||
| boost::adaptors::transformed([&] (const column_definition& cdef) {
|
||||
return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
|
||||
| boost::adaptors::transformed([&] (const column_definition& cdef) { return cdef.name_as_text(); })
|
||||
);
|
||||
// Include all base table columns as values (in case pre or post is enabled).
|
||||
// This will include attributes not stored in the frozen map column
|
||||
auto attr_names = boost::copy_range<attrs_to_get>(base->regular_columns()
|
||||
auto attr_names = boost::copy_range<std::unordered_set<std::string>>(base->regular_columns()
|
||||
// this will include the :attrs column, which we will also force evaluating.
|
||||
// But not having this set empty forces out any cdc columns from actual result
|
||||
| boost::adaptors::transformed([] (const column_definition& cdef) {
|
||||
return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
|
||||
| boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.name_as_text(); })
|
||||
);
|
||||
|
||||
std::vector<const column_definition*> columns;
|
||||
@@ -879,17 +883,8 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
auto partition_slice = query::partition_slice(
|
||||
std::move(bounds)
|
||||
, {}, std::move(regular_columns), selection->get_query_options());
|
||||
|
||||
auto& opts = base->cdc_options();
|
||||
auto mul = 2; // key-only, allow for delete + insert
|
||||
if (opts.preimage()) {
|
||||
++mul;
|
||||
}
|
||||
if (opts.postimage()) {
|
||||
++mul;
|
||||
}
|
||||
auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
|
||||
query::row_limit(limit * mul));
|
||||
query::row_limit(limit * 4));
|
||||
|
||||
return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
|
||||
[this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {
|
||||
@@ -1020,9 +1015,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
}
|
||||
|
||||
// ugh. figure out if we are and end-of-shard
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
|
||||
return _sdks.cdc_current_generation_timestamp({ normal_token_owners }).then([this, iter, high_ts, start_time, ret = std::move(ret)](db_clock::time_point ts) mutable {
|
||||
return cdc::get_local_streams_timestamp().then([this, iter, high_ts, start_time, ret = std::move(ret)](db_clock::time_point ts) mutable {
|
||||
auto& shard = iter.shard;
|
||||
|
||||
if (shard.time < ts && ts < high_ts) {
|
||||
|
||||
@@ -2925,10 +2925,6 @@
|
||||
"id":"toppartitions_query_results",
|
||||
"description":"nodetool toppartitions query results",
|
||||
"properties":{
|
||||
"read_cardinality":{
|
||||
"type":"long",
|
||||
"description":"Number of the unique operations in the sample set"
|
||||
},
|
||||
"read":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
@@ -2936,10 +2932,6 @@
|
||||
},
|
||||
"description":"Read results"
|
||||
},
|
||||
"write_cardinality":{
|
||||
"type":"long",
|
||||
"description":"Number of the unique operations in the sample set"
|
||||
},
|
||||
"write":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
|
||||
@@ -148,30 +148,6 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/gossiper/force_remove_endpoint/{addr}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Force remove an endpoint from gossip",
|
||||
"type":"void",
|
||||
"nickname":"force_remove_endpoint",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"addr",
|
||||
"description":"The endpoint address",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
"items":{
|
||||
"type":"message_counter"
|
||||
},
|
||||
"nickname":"get_replied_messages",
|
||||
"nickname":"get_completed_messages",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
"summary":"Get the hinted handoff enabled by dc",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"array"
|
||||
"type":"mapper_list"
|
||||
},
|
||||
"nickname":"get_hinted_handoff_enabled_by_dc",
|
||||
"produces":[
|
||||
|
||||
@@ -104,68 +104,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/toppartitions/",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Toppartitions query",
|
||||
"type":"toppartitions_query_results",
|
||||
"nickname":"toppartitions_generic",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"table_filters",
|
||||
"description":"Optional list of table name filters in keyspace:name format",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"string"
|
||||
},
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"keyspace_filters",
|
||||
"description":"Optional list of keyspace filters",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"string"
|
||||
},
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"duration",
|
||||
"description":"Duration (in milliseconds) of monitoring operation",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type": "long",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"list_size",
|
||||
"description":"number of the top partitions to list",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type": "long",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"capacity",
|
||||
"description":"capacity of stream summary: determines amount of resources used in query processing",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type": "long",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/nodes/leaving",
|
||||
"operations":[
|
||||
@@ -1032,14 +970,6 @@
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"ignore_nodes",
|
||||
"description":"Which hosts are to ignore in this repair. Multiple hosts can be listed separated by commas.",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"trace",
|
||||
"description":"If the value is the string 'true' with any capitalization, enable tracing of the repair.",
|
||||
@@ -1175,14 +1105,6 @@
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"ignore_nodes",
|
||||
"description":"List of dead nodes to ingore in removenode operation",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1834,22 +1756,6 @@
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"load_and_stream",
|
||||
"description":"Load the sstables and stream to all replica nodes that owns the data",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"primary_replica_only",
|
||||
"description":"Load the sstables and stream to primary replica node that owns the data. Repair is needed after the load and stream process",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1960,14 +1866,6 @@
|
||||
"allowMultiple":false,
|
||||
"type":"long",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"fast",
|
||||
"description":"Lightweight tracing mode: if true, slow queries tracing records only session headers",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"boolean",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -2466,10 +2364,6 @@
|
||||
"threshold":{
|
||||
"type":"long",
|
||||
"description":"The slow query logging threshold in microseconds. Queries that takes longer, will be logged"
|
||||
},
|
||||
"fast":{
|
||||
"type":"boolean",
|
||||
"description":"Is lightweight tracing mode enabled. In that mode tracing ignore events and tracks only sessions."
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -52,22 +52,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/system/drop_sstable_caches",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Drop in-memory caches for data which is in sstables",
|
||||
"type":"void",
|
||||
"nickname":"drop_sstable_caches",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/system/uptime_ms",
|
||||
"operations":[
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include <seastar/http/httpd.hh>
|
||||
|
||||
namespace service { class load_meter; }
|
||||
namespace locator { class shared_token_metadata; }
|
||||
namespace locator { class token_metadata; }
|
||||
namespace cql_transport { class controller; }
|
||||
class thrift_controller;
|
||||
namespace db { class snapshot_ctl; }
|
||||
@@ -39,15 +39,13 @@ struct http_context {
|
||||
distributed<database>& db;
|
||||
distributed<service::storage_proxy>& sp;
|
||||
service::load_meter& lmeter;
|
||||
const sharded<locator::shared_token_metadata>& shared_token_metadata;
|
||||
const sharded<locator::token_metadata>& token_metadata;
|
||||
|
||||
http_context(distributed<database>& _db,
|
||||
distributed<service::storage_proxy>& _sp,
|
||||
service::load_meter& _lm, const sharded<locator::shared_token_metadata>& _stm)
|
||||
: db(_db), sp(_sp), lmeter(_lm), shared_token_metadata(_stm) {
|
||||
service::load_meter& _lm, const sharded<locator::token_metadata>& _tm)
|
||||
: db(_db), sp(_sp), lmeter(_lm), token_metadata(_tm) {
|
||||
}
|
||||
|
||||
const locator::token_metadata& get_token_metadata();
|
||||
};
|
||||
|
||||
future<> set_server_init(http_context& ctx);
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include <algorithm>
|
||||
#include "db/system_keyspace_view_types.hh"
|
||||
#include "db/data_listeners.hh"
|
||||
#include "storage_service.hh"
|
||||
|
||||
extern logging::logger apilog;
|
||||
|
||||
@@ -181,7 +180,7 @@ static future<json::json_return_type> get_cf_unleveled_sstables(http_context& ct
|
||||
|
||||
static int64_t min_partition_size(column_family& cf) {
|
||||
int64_t res = INT64_MAX;
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
for (auto i: *cf.get_sstables() ) {
|
||||
res = std::min(res, i->get_stats_metadata().estimated_partition_size.min());
|
||||
}
|
||||
return (res == INT64_MAX) ? 0 : res;
|
||||
@@ -189,7 +188,7 @@ static int64_t min_partition_size(column_family& cf) {
|
||||
|
||||
static int64_t max_partition_size(column_family& cf) {
|
||||
int64_t res = 0;
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
for (auto i: *cf.get_sstables() ) {
|
||||
res = std::max(i->get_stats_metadata().estimated_partition_size.max(), res);
|
||||
}
|
||||
return res;
|
||||
@@ -197,7 +196,7 @@ static int64_t max_partition_size(column_family& cf) {
|
||||
|
||||
static integral_ratio_holder mean_partition_size(column_family& cf) {
|
||||
integral_ratio_holder res;
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
for (auto i: *cf.get_sstables() ) {
|
||||
auto c = i->get_stats_metadata().estimated_partition_size.count();
|
||||
res.sub += i->get_stats_metadata().estimated_partition_size.mean() * c;
|
||||
res.total += c;
|
||||
@@ -275,7 +274,7 @@ public:
|
||||
|
||||
static double get_compression_ratio(column_family& cf) {
|
||||
sum_ratio<double> result;
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
for (auto i : *cf.get_sstables()) {
|
||||
auto compression_ratio = i->get_compression_ratio();
|
||||
if (compression_ratio != sstables::metadata_collector::NO_COMPRESSION_RATIO) {
|
||||
result(compression_ratio);
|
||||
@@ -311,8 +310,8 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
return res;
|
||||
});
|
||||
|
||||
cf::get_column_family.set(r, [&ctx] (std::unique_ptr<request> req){
|
||||
std::list<cf::column_family_info> res;
|
||||
cf::get_column_family.set(r, [&ctx] (const_req req){
|
||||
vector<cf::column_family_info> res;
|
||||
for (auto i: ctx.db.local().get_column_families_mapping()) {
|
||||
cf::column_family_info info;
|
||||
info.ks = i.first.first;
|
||||
@@ -320,7 +319,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
info.type = "ColumnFamilies";
|
||||
res.push_back(info);
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(json::stream_range_as_array(std::move(res), std::identity()));
|
||||
return res;
|
||||
});
|
||||
|
||||
cf::get_column_family_name_keyspace.set(r, [&ctx] (const_req req){
|
||||
@@ -425,7 +424,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
|
||||
utils::estimated_histogram res(0);
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
for (auto i: *cf.get_sstables() ) {
|
||||
res.merge(i->get_stats_metadata().estimated_partition_size);
|
||||
}
|
||||
return res;
|
||||
@@ -437,7 +436,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
|
||||
uint64_t res = 0;
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
for (auto i: *cf.get_sstables() ) {
|
||||
res += i->get_stats_metadata().estimated_partition_size.count();
|
||||
}
|
||||
return res;
|
||||
@@ -448,7 +447,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](column_family& cf) {
|
||||
utils::estimated_histogram res(0);
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
for (auto i: *cf.get_sstables() ) {
|
||||
res.merge(i->get_stats_metadata().estimated_cells_count);
|
||||
}
|
||||
return res;
|
||||
@@ -600,8 +599,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_get_false_positive();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -609,8 +607,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_all_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_get_false_positive();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -618,8 +615,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_get_recent_false_positive();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -627,8 +623,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_all_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_get_recent_false_positive();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -660,8 +655,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_size();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -669,8 +663,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_all_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_size();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -678,8 +671,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_memory_size();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -687,8 +679,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_all_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_memory_size();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -696,8 +687,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->get_summary().memory_footprint();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -705,8 +695,7 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->get_summary().memory_footprint();
|
||||
});
|
||||
}, std::plus<uint64_t>());
|
||||
@@ -984,20 +973,42 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
cf::toppartitions.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
auto name = req->param["name"];
|
||||
auto [ks, cf] = parse_fully_qualified_cf_name(name);
|
||||
auto name_param = req->param["name"];
|
||||
auto [ks, cf] = parse_fully_qualified_cf_name(name_param);
|
||||
|
||||
api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
|
||||
api::req_param<unsigned> capacity(*req, "capacity", 256);
|
||||
api::req_param<unsigned> list_size(*req, "list_size", 10);
|
||||
|
||||
apilog.info("toppartitions query: name={} duration={} list_size={} capacity={}",
|
||||
name, duration.param, list_size.param, capacity.param);
|
||||
name_param, duration.param, list_size.param, capacity.param);
|
||||
|
||||
return seastar::do_with(db::toppartitions_query(ctx.db, {{ks, cf}}, {}, duration.value, list_size, capacity), [&ctx] (db::toppartitions_query& q) {
|
||||
return run_toppartitions_query(q, ctx, true);
|
||||
return seastar::do_with(db::toppartitions_query(ctx.db, ks, cf, duration.value, list_size, capacity), [&ctx](auto& q) {
|
||||
return q.scatter().then([&q] {
|
||||
return sleep(q.duration()).then([&q] {
|
||||
return q.gather(q.capacity()).then([&q] (auto topk_results) {
|
||||
apilog.debug("toppartitions query: processing results");
|
||||
cf::toppartitions_query_results results;
|
||||
|
||||
for (auto& d: topk_results.read.top(q.list_size())) {
|
||||
cf::toppartitions_record r;
|
||||
r.partition = sstring(d.item);
|
||||
r.count = d.count;
|
||||
r.error = d.error;
|
||||
results.read.push(r);
|
||||
}
|
||||
for (auto& d: topk_results.write.top(q.list_size())) {
|
||||
cf::toppartitions_record r;
|
||||
r.partition = sstring(d.item);
|
||||
r.count = d.count;
|
||||
r.error = d.error;
|
||||
results.write.push(r);
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(results);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -116,7 +116,4 @@ future<json::json_return_type> get_cf_stats(http_context& ctx, const sstring& n
|
||||
future<json::json_return_type> get_cf_stats(http_context& ctx,
|
||||
int64_t column_family_stats::*f);
|
||||
|
||||
|
||||
std::tuple<sstring, sstring> parse_fully_qualified_cf_name(sstring name);
|
||||
|
||||
}
|
||||
|
||||
@@ -58,7 +58,6 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
|
||||
for (const auto& c : cm.get_compactions()) {
|
||||
cm::summary s;
|
||||
s.id = c->compaction_uuid.to_sstring();
|
||||
s.ks = c->ks_name;
|
||||
s.cf = c->cf_name;
|
||||
s.unit = "keys";
|
||||
|
||||
@@ -66,13 +66,6 @@ void set_gossiper(http_context& ctx, routes& r) {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
httpd::gossiper_json::force_remove_endpoint.set(r, [](std::unique_ptr<request> req) {
|
||||
gms::inet_address ep(req->param["addr"]);
|
||||
return gms::get_local_gossiper().force_remove_endpoint(ep).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include <seastar/http/exception.hh>
|
||||
#include "utils/logalloc.hh"
|
||||
#include "log.hh"
|
||||
#include "database.hh"
|
||||
|
||||
namespace api {
|
||||
|
||||
|
||||
@@ -96,10 +96,6 @@ void set_messaging_service(http_context& ctx, routes& r, sharded<netw::messaging
|
||||
return c.get_stats().sent_messages;
|
||||
}));
|
||||
|
||||
get_replied_messages.set(r, get_client_getter(ms, [](const shard_info& c) {
|
||||
return c.get_stats().replied;
|
||||
}));
|
||||
|
||||
get_dropped_messages.set(r, get_client_getter(ms, [](const shard_info& c) {
|
||||
// We don't have the same drop message mechanism
|
||||
// as origin has.
|
||||
@@ -159,7 +155,6 @@ void set_messaging_service(http_context& ctx, routes& r, sharded<netw::messaging
|
||||
void unset_messaging_service(http_context& ctx, routes& r) {
|
||||
get_timeout_messages.unset(r);
|
||||
get_sent_messages.unset(r);
|
||||
get_replied_messages.unset(r);
|
||||
get_dropped_messages.unset(r);
|
||||
get_exception_messages.unset(r);
|
||||
get_pending_messages.unset(r);
|
||||
|
||||
@@ -201,39 +201,29 @@ void set_storage_proxy(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
sp::get_hinted_handoff_enabled.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
const auto& filter = service::get_storage_proxy().local().get_hints_host_filter();
|
||||
return make_ready_future<json::json_return_type>(!filter.is_disabled_for_all());
|
||||
auto enabled = ctx.db.local().get_config().hinted_handoff_enabled();
|
||||
return make_ready_future<json::json_return_type>(enabled);
|
||||
});
|
||||
|
||||
sp::set_hinted_handoff_enabled.set(r, [](std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
auto enable = req->get_query_param("enable");
|
||||
auto filter = (enable == "true" || enable == "1")
|
||||
? db::hints::host_filter(db::hints::host_filter::enabled_for_all_tag {})
|
||||
: db::hints::host_filter(db::hints::host_filter::disabled_for_all_tag {});
|
||||
return service::get_storage_proxy().invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
|
||||
return sp.change_hints_host_filter(filter);
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
sp::get_hinted_handoff_enabled_by_dc.set(r, [](std::unique_ptr<request> req) {
|
||||
std::vector<sstring> res;
|
||||
const auto& filter = service::get_storage_proxy().local().get_hints_host_filter();
|
||||
const auto& dcs = filter.get_dcs();
|
||||
res.reserve(res.size());
|
||||
std::copy(dcs.begin(), dcs.end(), std::back_inserter(res));
|
||||
//TBD
|
||||
unimplemented();
|
||||
std::vector<sp::mapper_list> res;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
sp::set_hinted_handoff_enabled_by_dc_list.set(r, [](std::unique_ptr<request> req) {
|
||||
auto dcs = req->get_query_param("dcs");
|
||||
auto filter = db::hints::host_filter::parse_from_dc_list(std::move(dcs));
|
||||
return service::get_storage_proxy().invoke_on_all([filter = std::move(filter)] (service::storage_proxy& sp) {
|
||||
return sp.change_hints_host_filter(filter);
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
//TBD
|
||||
unimplemented();
|
||||
auto enable = req->get_query_param("dcs");
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
sp::get_max_hint_window.set(r, [](std::unique_ptr<request> req) {
|
||||
|
||||
@@ -22,14 +22,10 @@
|
||||
#include "storage_service.hh"
|
||||
#include "api/api-doc/storage_service.json.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "utils/hash.hh"
|
||||
#include <sstream>
|
||||
#include <optional>
|
||||
#include <time.h>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/filtered.hpp>
|
||||
#include <boost/algorithm/string/trim_all.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include "service/storage_service.hh"
|
||||
#include "service/load_meter.hh"
|
||||
#include "db/commitlog/commitlog.hh"
|
||||
@@ -48,17 +44,9 @@
|
||||
#include "db/snapshot-ctl.hh"
|
||||
#include "transport/controller.hh"
|
||||
#include "thrift/controller.hh"
|
||||
#include "locator/token_metadata.hh"
|
||||
#include "cdc/generation_service.hh"
|
||||
|
||||
extern logging::logger apilog;
|
||||
|
||||
namespace api {
|
||||
|
||||
const locator::token_metadata& http_context::get_token_metadata() {
|
||||
return *shared_token_metadata.local().get();
|
||||
}
|
||||
|
||||
namespace ss = httpd::storage_service_json;
|
||||
using namespace json;
|
||||
|
||||
@@ -100,37 +88,6 @@ static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
|
||||
};
|
||||
}
|
||||
|
||||
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request) {
|
||||
namespace cf = httpd::column_family_json;
|
||||
return q.scatter().then([&q, legacy_request] {
|
||||
return sleep(q.duration()).then([&q, legacy_request] {
|
||||
return q.gather(q.capacity()).then([&q, legacy_request] (auto topk_results) {
|
||||
apilog.debug("toppartitions query: processing results");
|
||||
cf::toppartitions_query_results results;
|
||||
|
||||
results.read_cardinality = topk_results.read.size();
|
||||
results.write_cardinality = topk_results.write.size();
|
||||
|
||||
for (auto& d: topk_results.read.top(q.list_size())) {
|
||||
cf::toppartitions_record r;
|
||||
r.partition = (legacy_request ? "" : "(" + d.item.schema->ks_name() + ":" + d.item.schema->cf_name() + ") ") + sstring(d.item);
|
||||
r.count = d.count;
|
||||
r.error = d.error;
|
||||
results.read.push(r);
|
||||
}
|
||||
for (auto& d: topk_results.write.top(q.list_size())) {
|
||||
cf::toppartitions_record r;
|
||||
r.partition = (legacy_request ? "" : "(" + d.item.schema->ks_name() + ":" + d.item.schema->cf_name() + ") ") + sstring(d.item);
|
||||
r.count = d.count;
|
||||
r.error = d.error;
|
||||
results.write.push(r);
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(results);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<json::json_return_type> set_tables_autocompaction(http_context& ctx, const sstring &keyspace, std::vector<sstring> tables, bool enabled) {
|
||||
if (tables.empty()) {
|
||||
tables = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
|
||||
@@ -196,7 +153,7 @@ void unset_rpc_controller(http_context& ctx, routes& r) {
|
||||
void set_repair(http_context& ctx, routes& r, sharded<netw::messaging_service>& ms) {
|
||||
ss::repair_async.set(r, [&ctx, &ms](std::unique_ptr<request> req) {
|
||||
static std::vector<sstring> options = {"primaryRange", "parallelism", "incremental",
|
||||
"jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "ignore_nodes", "trace",
|
||||
"jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "trace",
|
||||
"startToken", "endToken" };
|
||||
std::unordered_map<sstring, sstring> options_map;
|
||||
for (auto o : options) {
|
||||
@@ -262,7 +219,7 @@ void set_repair(http_context& ctx, routes& r, sharded<netw::messaging_service>&
|
||||
try {
|
||||
res = fut.get0();
|
||||
} catch (std::exception& e) {
|
||||
return make_exception_future<json::json_return_type>(httpd::bad_param_exception(e.what()));
|
||||
return make_exception_future<json::json_return_type>(httpd::server_error_exception(e.what()));
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(json::json_return_type(res));
|
||||
});
|
||||
@@ -299,14 +256,14 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
ss::get_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().sorted_tokens(), [](const dht::token& i) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().sorted_tokens(), [](const dht::token& i) {
|
||||
return boost::lexical_cast<std::string>(i);
|
||||
}));
|
||||
});
|
||||
|
||||
ss::get_node_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
gms::inet_address addr(req->param["endpoint"]);
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.get_token_metadata().get_tokens(addr), [](const dht::token& i) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().get_tokens(addr), [](const dht::token& i) {
|
||||
return boost::lexical_cast<std::string>(i);
|
||||
}));
|
||||
});
|
||||
@@ -324,58 +281,8 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
}));
|
||||
});
|
||||
|
||||
ss::toppartitions_generic.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
bool filters_provided = false;
|
||||
|
||||
std::unordered_set<std::tuple<sstring, sstring>, utils::tuple_hash> table_filters {};
|
||||
if (req->query_parameters.contains("table_filters")) {
|
||||
filters_provided = true;
|
||||
auto filters = req->get_query_param("table_filters");
|
||||
std::stringstream ss { filters };
|
||||
std::string filter;
|
||||
while (!filters.empty() && ss.good()) {
|
||||
std::getline(ss, filter, ',');
|
||||
table_filters.emplace(parse_fully_qualified_cf_name(filter));
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<sstring> keyspace_filters {};
|
||||
if (req->query_parameters.contains("keyspace_filters")) {
|
||||
filters_provided = true;
|
||||
auto filters = req->get_query_param("keyspace_filters");
|
||||
std::stringstream ss { filters };
|
||||
std::string filter;
|
||||
while (!filters.empty() && ss.good()) {
|
||||
std::getline(ss, filter, ',');
|
||||
keyspace_filters.emplace(std::move(filter));
|
||||
}
|
||||
}
|
||||
|
||||
// when the query is empty return immediately
|
||||
if (filters_provided && table_filters.empty() && keyspace_filters.empty()) {
|
||||
apilog.debug("toppartitions query: processing results");
|
||||
httpd::column_family_json::toppartitions_query_results results;
|
||||
|
||||
results.read_cardinality = 0;
|
||||
results.write_cardinality = 0;
|
||||
|
||||
return make_ready_future<json::json_return_type>(results);
|
||||
}
|
||||
|
||||
api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
|
||||
api::req_param<unsigned> capacity(*req, "capacity", 256);
|
||||
api::req_param<unsigned> list_size(*req, "list_size", 10);
|
||||
|
||||
apilog.info("toppartitions query: #table_filters={} #keyspace_filters={} duration={} list_size={} capacity={}",
|
||||
!table_filters.empty() ? std::to_string(table_filters.size()) : "all", !keyspace_filters.empty() ? std::to_string(keyspace_filters.size()) : "all", duration.param, list_size.param, capacity.param);
|
||||
|
||||
return seastar::do_with(db::toppartitions_query(ctx.db, std::move(table_filters), std::move(keyspace_filters), duration.value, list_size, capacity), [&ctx] (db::toppartitions_query& q) {
|
||||
return run_toppartitions_query(q, ctx);
|
||||
});
|
||||
});
|
||||
|
||||
ss::get_leaving_nodes.set(r, [&ctx](const_req req) {
|
||||
return container_to_vec(ctx.get_token_metadata().get_leaving_endpoints());
|
||||
return container_to_vec(ctx.token_metadata.local().get_leaving_endpoints());
|
||||
});
|
||||
|
||||
ss::get_moving_nodes.set(r, [](const_req req) {
|
||||
@@ -384,7 +291,7 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
ss::get_joining_nodes.set(r, [&ctx](const_req req) {
|
||||
auto points = ctx.get_token_metadata().get_bootstrap_tokens();
|
||||
auto points = ctx.token_metadata.local().get_bootstrap_tokens();
|
||||
std::unordered_set<sstring> addr;
|
||||
for (auto i: points) {
|
||||
addr.insert(boost::lexical_cast<std::string>(i.second));
|
||||
@@ -453,7 +360,7 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
|
||||
ss::get_host_id_map.set(r, [&ctx](const_req req) {
|
||||
std::vector<ss::mapper> res;
|
||||
return map_to_key_value(ctx.get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
|
||||
return map_to_key_value(ctx.token_metadata.local().get_endpoint_to_host_id_map_for_reading(), res);
|
||||
});
|
||||
|
||||
ss::get_load.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
@@ -487,7 +394,7 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
ss::cdc_streams_check_and_repair.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return service::get_local_storage_service().get_cdc_generation_service().check_and_repair_cdc_streams().then([] {
|
||||
return service::get_local_storage_service().check_and_repair_cdc_streams().then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
@@ -583,22 +490,7 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
|
||||
ss::remove_node.set(r, [](std::unique_ptr<request> req) {
|
||||
auto host_id = req->get_query_param("host_id");
|
||||
std::vector<sstring> ignore_nodes_strs= split(req->get_query_param("ignore_nodes"), ",");
|
||||
auto ignore_nodes = std::list<gms::inet_address>();
|
||||
for (std::string n : ignore_nodes_strs) {
|
||||
try {
|
||||
std::replace(n.begin(), n.end(), '\"', ' ');
|
||||
std::replace(n.begin(), n.end(), '\'', ' ');
|
||||
boost::trim_all(n);
|
||||
if (!n.empty()) {
|
||||
auto node = gms::inet_address(n);
|
||||
ignore_nodes.push_back(node);
|
||||
}
|
||||
} catch (...) {
|
||||
throw std::runtime_error(format("Failed to parse ignore_nodes parameter: ignore_nodes={}, node={}", ignore_nodes_strs, n));
|
||||
}
|
||||
}
|
||||
return service::get_local_storage_service().removenode(host_id, std::move(ignore_nodes)).then([] {
|
||||
return service::get_local_storage_service().removenode(host_id).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
@@ -818,19 +710,11 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
ss::load_new_ss_tables.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
auto ks = validate_keyspace(ctx, req->param);
|
||||
auto cf = req->get_query_param("cf");
|
||||
auto stream = req->get_query_param("load_and_stream");
|
||||
auto primary_replica = req->get_query_param("primary_replica_only");
|
||||
boost::algorithm::to_lower(stream);
|
||||
boost::algorithm::to_lower(primary_replica);
|
||||
bool load_and_stream = stream == "true" || stream == "1";
|
||||
bool primary_replica_only = primary_replica == "true" || primary_replica == "1";
|
||||
// No need to add the keyspace, since all we want is to avoid always sending this to the same
|
||||
// CPU. Even then I am being overzealous here. This is not something that happens all the time.
|
||||
auto coordinator = std::hash<sstring>()(cf) % smp::count;
|
||||
return service::get_storage_service().invoke_on(coordinator,
|
||||
[ks = std::move(ks), cf = std::move(cf),
|
||||
load_and_stream, primary_replica_only] (service::storage_service& s) {
|
||||
return s.load_new_sstables(ks, cf, load_and_stream, primary_replica_only);
|
||||
return service::get_storage_service().invoke_on(coordinator, [ks = std::move(ks), cf = std::move(cf)] (service::storage_service& s) {
|
||||
return s.load_new_sstables(ks, cf);
|
||||
}).then_wrapped([] (auto&& f) {
|
||||
if (f.failed()) {
|
||||
auto msg = fmt::format("Failed to load new sstables: {}", f.get_exception());
|
||||
@@ -848,12 +732,9 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
ss::reset_local_schema.set(r, [](std::unique_ptr<request> req) {
|
||||
// FIXME: We should truncate schema tables if more than one node in the cluster.
|
||||
auto& sp = service::get_storage_proxy();
|
||||
auto& fs = service::get_local_storage_service().features();
|
||||
return db::schema_tables::recalculate_schema_version(sp, fs).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
//TBD
|
||||
unimplemented();
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
|
||||
@@ -886,7 +767,6 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
res.enable = tracing::tracing::get_local_tracing_instance().slow_query_tracing_enabled();
|
||||
res.ttl = tracing::tracing::get_local_tracing_instance().slow_query_record_ttl().count() ;
|
||||
res.threshold = tracing::tracing::get_local_tracing_instance().slow_query_threshold().count();
|
||||
res.fast = tracing::tracing::get_local_tracing_instance().ignore_trace_events_enabled();
|
||||
return res;
|
||||
});
|
||||
|
||||
@@ -894,9 +774,8 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
auto enable = req->get_query_param("enable");
|
||||
auto ttl = req->get_query_param("ttl");
|
||||
auto threshold = req->get_query_param("threshold");
|
||||
auto fast = req->get_query_param("fast");
|
||||
try {
|
||||
return tracing::tracing::tracing_instance().invoke_on_all([enable, ttl, threshold, fast] (auto& local_tracing) {
|
||||
return tracing::tracing::tracing_instance().invoke_on_all([enable, ttl, threshold] (auto& local_tracing) {
|
||||
if (threshold != "") {
|
||||
local_tracing.set_slow_query_threshold(std::chrono::microseconds(std::stol(threshold.c_str())));
|
||||
}
|
||||
@@ -906,9 +785,6 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
if (enable != "") {
|
||||
local_tracing.set_slow_query_enabled(strcasecmp(enable.c_str(), "true") == 0);
|
||||
}
|
||||
if (fast != "") {
|
||||
local_tracing.set_ignore_trace_events(strcasecmp(fast.c_str(), "true") == 0);
|
||||
}
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
@@ -1078,7 +954,7 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
tst.keyspace = schema->ks_name();
|
||||
tst.table = schema->cf_name();
|
||||
|
||||
for (auto sstables = t->get_sstables_including_compacted_undeleted(); auto sstable : *sstables) {
|
||||
for (auto sstable : *t->get_sstables_including_compacted_undeleted()) {
|
||||
auto ts = db_clock::to_time_t(sstable->data_file_write_time());
|
||||
::tm t;
|
||||
::gmtime_r(&ts, &t);
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include "api.hh"
|
||||
#include "db/data_listeners.hh"
|
||||
|
||||
namespace cql_transport { class controller; }
|
||||
class thrift_controller;
|
||||
@@ -41,6 +40,5 @@ void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl);
|
||||
void unset_rpc_controller(http_context& ctx, routes& r);
|
||||
void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_ctl);
|
||||
void unset_snapshot(http_context& ctx, routes& r);
|
||||
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request = false);
|
||||
|
||||
}
|
||||
|
||||
@@ -25,9 +25,6 @@
|
||||
#include <seastar/core/reactor.hh>
|
||||
#include <seastar/http/exception.hh>
|
||||
#include "log.hh"
|
||||
#include "database.hh"
|
||||
|
||||
extern logging::logger apilog;
|
||||
|
||||
namespace api {
|
||||
|
||||
@@ -73,16 +70,6 @@ void set_system(http_context& ctx, routes& r) {
|
||||
}
|
||||
return json::json_void();
|
||||
});
|
||||
|
||||
hs::drop_sstable_caches.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
apilog.info("Dropping sstable caches");
|
||||
return ctx.db.invoke_on_all([] (database& db) {
|
||||
return db.drop_caches();
|
||||
}).then([] {
|
||||
apilog.info("Caches dropped");
|
||||
return json::json_return_type(json::json_void());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
201
atomic_cell.cc
201
atomic_cell.cc
@@ -24,130 +24,142 @@
|
||||
#include "counters.hh"
|
||||
#include "types.hh"
|
||||
|
||||
/// LSA mirator for cells with irrelevant type
|
||||
///
|
||||
///
|
||||
const data::type_imr_descriptor& no_type_imr_descriptor() {
|
||||
static thread_local data::type_imr_descriptor state(data::type_info::make_variable_size());
|
||||
return state;
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) {
|
||||
return atomic_cell_type::make_dead(timestamp, deletion_time);
|
||||
auto& imr_data = no_type_imr_descriptor();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_dead(timestamp, deletion_time), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value, atomic_cell::collection_member cm) {
|
||||
return atomic_cell_type::make_live(timestamp, single_fragment_range(value));
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, managed_bytes_view value, atomic_cell::collection_member cm) {
|
||||
return atomic_cell_type::make_live(timestamp, fragment_range(value));
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, bool(cm)), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, ser::buffer_view<bytes_ostream::fragment_iterator> value, atomic_cell::collection_member cm) {
|
||||
return atomic_cell_type::make_live(timestamp, value);
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, bool(cm)), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, const fragmented_temporary_buffer::view& value, collection_member cm)
|
||||
{
|
||||
return atomic_cell_type::make_live(timestamp, value);
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, bool(cm)), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, atomic_cell::collection_member cm) {
|
||||
return atomic_cell_type::make_live(timestamp, single_fragment_range(value), expiry, ttl);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, managed_bytes_view value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, atomic_cell::collection_member cm) {
|
||||
return atomic_cell_type::make_live(timestamp, fragment_range(value), expiry, ttl);
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, expiry, ttl, bool(cm)), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, ser::buffer_view<bytes_ostream::fragment_iterator> value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, atomic_cell::collection_member cm) {
|
||||
return atomic_cell_type::make_live(timestamp, value, expiry, ttl);
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, expiry, ttl, bool(cm)), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, const fragmented_temporary_buffer::view& value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, collection_member cm)
|
||||
{
|
||||
return atomic_cell_type::make_live(timestamp, value, expiry, ttl);
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, expiry, ttl, bool(cm)), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
|
||||
return atomic_cell_type::make_live_counter_update(timestamp, value);
|
||||
auto& imr_data = no_type_imr_descriptor();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live_counter_update(timestamp, value), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live_uninitialized(const abstract_type& type, api::timestamp_type timestamp, size_t size) {
|
||||
return atomic_cell_type::make_live_uninitialized(timestamp, size);
|
||||
auto& imr_data = no_type_imr_descriptor();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live_uninitialized(imr_data.type_info(), timestamp, size), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
static imr::utils::object<data::cell::structure> copy_cell(const data::type_imr_descriptor& imr_data, const uint8_t* ptr)
|
||||
{
|
||||
using imr_object_type = imr::utils::object<data::cell::structure>;
|
||||
|
||||
// If the cell doesn't own any memory it is trivial and can be copied with
|
||||
// memcpy.
|
||||
auto f = data::cell::structure::get_member<data::cell::tags::flags>(ptr);
|
||||
if (!f.template get<data::cell::tags::external_data>()) {
|
||||
data::cell::context ctx(f, imr_data.type_info());
|
||||
// XXX: We may be better off storing the total cell size in memory. Measure!
|
||||
auto size = data::cell::structure::serialized_object_size(ptr, ctx);
|
||||
return imr_object_type::make_raw(size, [&] (uint8_t* dst) noexcept {
|
||||
std::copy_n(ptr, size, dst);
|
||||
}, &imr_data.lsa_migrator());
|
||||
}
|
||||
|
||||
return imr_object_type::make(data::cell::copy_fn(imr_data.type_info(), ptr), &imr_data.lsa_migrator());
|
||||
}
|
||||
|
||||
atomic_cell::atomic_cell(const abstract_type& type, atomic_cell_view other)
|
||||
: _data(other._view) {
|
||||
set_view(_data);
|
||||
}
|
||||
|
||||
// Based on:
|
||||
// - org.apache.cassandra.db.AbstractCell#reconcile()
|
||||
// - org.apache.cassandra.db.BufferExpiringCell#reconcile()
|
||||
// - org.apache.cassandra.db.BufferDeletedCell#reconcile()
|
||||
int
|
||||
compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) {
|
||||
if (left.timestamp() != right.timestamp()) {
|
||||
return left.timestamp() > right.timestamp() ? 1 : -1;
|
||||
}
|
||||
if (left.is_live() != right.is_live()) {
|
||||
return left.is_live() ? -1 : 1;
|
||||
}
|
||||
if (left.is_live()) {
|
||||
auto c = compare_unsigned(left.value(), right.value());
|
||||
if (c != 0) {
|
||||
return c;
|
||||
}
|
||||
if (left.is_live_and_has_ttl() != right.is_live_and_has_ttl()) {
|
||||
// prefer expiring cells.
|
||||
return left.is_live_and_has_ttl() ? 1 : -1;
|
||||
}
|
||||
if (left.is_live_and_has_ttl()) {
|
||||
if (left.expiry() != right.expiry()) {
|
||||
return left.expiry() < right.expiry() ? -1 : 1;
|
||||
} else {
|
||||
// prefer the cell that was written later,
|
||||
// so it survives longer after it expires, until purged.
|
||||
if (left.ttl() != right.ttl()) {
|
||||
return left.ttl() < right.ttl() ? 1 : -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Both are deleted
|
||||
if (left.deletion_time() != right.deletion_time()) {
|
||||
// Origin compares big-endian serialized deletion time. That's because it
|
||||
// delegates to AbstractCell.reconcile() which compares values after
|
||||
// comparing timestamps, which in case of deleted cells will hold
|
||||
// serialized expiry.
|
||||
return (uint64_t) left.deletion_time().time_since_epoch().count()
|
||||
< (uint64_t) right.deletion_time().time_since_epoch().count() ? -1 : 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
: atomic_cell(type.imr_state().type_info(),
|
||||
copy_cell(type.imr_state(), other._view.raw_pointer()))
|
||||
{ }
|
||||
|
||||
atomic_cell_or_collection atomic_cell_or_collection::copy(const abstract_type& type) const {
|
||||
if (_data.empty()) {
|
||||
if (!_data.get()) {
|
||||
return atomic_cell_or_collection();
|
||||
}
|
||||
return atomic_cell_or_collection(managed_bytes(_data));
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell_or_collection(
|
||||
copy_cell(imr_data, _data.get())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell_or_collection::atomic_cell_or_collection(const abstract_type& type, atomic_cell_view acv)
|
||||
: _data(acv._view)
|
||||
: _data(copy_cell(type.imr_state(), acv._view.raw_pointer()))
|
||||
{
|
||||
}
|
||||
|
||||
bool atomic_cell_or_collection::equals(const abstract_type& type, const atomic_cell_or_collection& other) const
|
||||
{
|
||||
if (_data.empty() || other._data.empty()) {
|
||||
return _data.empty() && other._data.empty();
|
||||
auto ptr_a = _data.get();
|
||||
auto ptr_b = other._data.get();
|
||||
|
||||
if (!ptr_a || !ptr_b) {
|
||||
return !ptr_a && !ptr_b;
|
||||
}
|
||||
|
||||
if (type.is_atomic()) {
|
||||
auto a = atomic_cell_view::from_bytes(type, _data);
|
||||
auto b = atomic_cell_view::from_bytes(type, other._data);
|
||||
auto a = atomic_cell_view::from_bytes(type.imr_state().type_info(), _data);
|
||||
auto b = atomic_cell_view::from_bytes(type.imr_state().type_info(), other._data);
|
||||
if (a.timestamp() != b.timestamp()) {
|
||||
return false;
|
||||
}
|
||||
@@ -179,7 +191,28 @@ bool atomic_cell_or_collection::equals(const abstract_type& type, const atomic_c
|
||||
|
||||
size_t atomic_cell_or_collection::external_memory_usage(const abstract_type& t) const
|
||||
{
|
||||
return _data.external_memory_usage();
|
||||
if (!_data.get()) {
|
||||
return 0;
|
||||
}
|
||||
auto ctx = data::cell::context(_data.get(), t.imr_state().type_info());
|
||||
|
||||
auto view = data::cell::structure::make_view(_data.get(), ctx);
|
||||
auto flags = view.get<data::cell::tags::flags>();
|
||||
|
||||
size_t external_value_size = 0;
|
||||
if (flags.get<data::cell::tags::external_data>()) {
|
||||
if (flags.get<data::cell::tags::collection>()) {
|
||||
external_value_size = as_collection_mutation().data.size_bytes();
|
||||
} else {
|
||||
auto cell_view = data::cell::atomic_cell_view(t.imr_state().type_info(), view);
|
||||
external_value_size = cell_view.value_size();
|
||||
}
|
||||
// Add overhead of chunk headers. The last one is a special case.
|
||||
external_value_size += (external_value_size - 1) / data::cell::effective_external_chunk_length * data::cell::external_chunk_overhead;
|
||||
external_value_size += data::cell::external_last_chunk_overhead;
|
||||
}
|
||||
return data::cell::structure::serialized_object_size(_data.get(), ctx)
|
||||
+ imr_object_type::size_overhead + external_value_size;
|
||||
}
|
||||
|
||||
std::ostream&
|
||||
@@ -188,7 +221,7 @@ operator<<(std::ostream& os, const atomic_cell_view& acv) {
|
||||
return fmt_print(os, "atomic_cell{{{},ts={:d},expiry={:d},ttl={:d}}}",
|
||||
acv.is_counter_update()
|
||||
? "counter_update_value=" + to_sstring(acv.counter_update_value())
|
||||
: to_hex(to_bytes(acv.value())),
|
||||
: to_hex(acv.value().linearize()),
|
||||
acv.timestamp(),
|
||||
acv.is_live_and_has_ttl() ? acv.expiry().time_since_epoch().count() : -1,
|
||||
acv.is_live_and_has_ttl() ? acv.ttl().count() : 0);
|
||||
@@ -214,11 +247,12 @@ operator<<(std::ostream& os, const atomic_cell_view::printer& acvp) {
|
||||
cell_value_string_builder << "counter_update_value=" << acv.counter_update_value();
|
||||
} else {
|
||||
cell_value_string_builder << "shards: ";
|
||||
auto ccv = counter_cell_view(acv);
|
||||
cell_value_string_builder << ::join(", ", ccv.shards());
|
||||
counter_cell_view::with_linearized(acv, [&cell_value_string_builder] (counter_cell_view& ccv) {
|
||||
cell_value_string_builder << ::join(", ", ccv.shards());
|
||||
});
|
||||
}
|
||||
} else {
|
||||
cell_value_string_builder << type.to_string(to_bytes(acv.value()));
|
||||
cell_value_string_builder << type.to_string(acv.value().linearize());
|
||||
}
|
||||
return fmt_print(os, "atomic_cell{{{},ts={:d},expiry={:d},ttl={:d}}}",
|
||||
cell_value_string_builder.str(),
|
||||
@@ -237,11 +271,12 @@ operator<<(std::ostream& os, const atomic_cell::printer& acp) {
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const atomic_cell_or_collection::printer& p) {
|
||||
if (p._cell._data.empty()) {
|
||||
if (!p._cell._data.get()) {
|
||||
return os << "{ null atomic_cell_or_collection }";
|
||||
}
|
||||
using dc = data::cell;
|
||||
os << "{ ";
|
||||
if (p._cdef.type->is_multi_cell()) {
|
||||
if (dc::structure::get_member<dc::tags::flags>(p._cell._data.get()).get<dc::tags::collection>()) {
|
||||
os << "collection ";
|
||||
auto cmv = p._cell.as_collection_mutation();
|
||||
os << collection_mutation_view::printer(*p._cdef.type, cmv);
|
||||
|
||||
276
atomic_cell.hh
276
atomic_cell.hh
@@ -26,12 +26,12 @@
|
||||
#include "tombstone.hh"
|
||||
#include "gc_clock.hh"
|
||||
#include "utils/managed_bytes.hh"
|
||||
#include "utils/fragment_range.hh"
|
||||
#include <seastar/net//byteorder.hh>
|
||||
#include <seastar/util/bool_class.hh>
|
||||
#include <cstdint>
|
||||
#include <iosfwd>
|
||||
#include <concepts>
|
||||
#include "data/cell.hh"
|
||||
#include "data/schema_info.hh"
|
||||
#include "imr/utils.hh"
|
||||
#include "utils/fragmented_temporary_buffer.hh"
|
||||
|
||||
#include "serializer.hh"
|
||||
@@ -40,191 +40,41 @@ class abstract_type;
|
||||
class collection_type_impl;
|
||||
class atomic_cell_or_collection;
|
||||
|
||||
using atomic_cell_value = managed_bytes;
|
||||
template <mutable_view is_mutable>
|
||||
using atomic_cell_value_basic_view = managed_bytes_basic_view<is_mutable>;
|
||||
using atomic_cell_value_view = atomic_cell_value_basic_view<mutable_view::no>;
|
||||
using atomic_cell_value_mutable_view = atomic_cell_value_basic_view<mutable_view::yes>;
|
||||
|
||||
template <typename T>
|
||||
requires std::is_trivial_v<T>
|
||||
static void set_field(atomic_cell_value_mutable_view& out, unsigned offset, T val) {
|
||||
auto out_view = managed_bytes_mutable_view(out);
|
||||
out_view.remove_prefix(offset);
|
||||
write<T>(out_view, val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires std::is_trivial_v<T>
|
||||
static void set_field(atomic_cell_value& out, unsigned offset, T val) {
|
||||
auto out_view = atomic_cell_value_mutable_view(out);
|
||||
set_field(out_view, offset, val);
|
||||
}
|
||||
|
||||
template <FragmentRange Buffer>
|
||||
static void set_value(managed_bytes& b, unsigned value_offset, const Buffer& value) {
|
||||
auto v = managed_bytes_mutable_view(b).substr(value_offset, value.size_bytes());
|
||||
for (auto frag : value) {
|
||||
write_fragmented(v, single_fragmented_view(frag));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, FragmentedView Input>
|
||||
requires std::is_trivial_v<T>
|
||||
static T get_field(Input in, unsigned offset = 0) {
|
||||
in.remove_prefix(offset);
|
||||
return read_simple<T>(in);
|
||||
}
|
||||
|
||||
/*
|
||||
* Represents atomic cell layout. Works on serialized form.
|
||||
*
|
||||
* Layout:
|
||||
*
|
||||
* <live> := <int8_t:flags><int64_t:timestamp>(<int64_t:expiry><int32_t:ttl>)?<value>
|
||||
* <dead> := <int8_t: 0><int64_t:timestamp><int64_t:deletion_time>
|
||||
*/
|
||||
class atomic_cell_type final {
|
||||
private:
|
||||
static constexpr int8_t LIVE_FLAG = 0x01;
|
||||
static constexpr int8_t EXPIRY_FLAG = 0x02; // When present, expiry field is present. Set only for live cells
|
||||
static constexpr int8_t COUNTER_UPDATE_FLAG = 0x08; // Cell is a counter update.
|
||||
static constexpr unsigned flags_size = 1;
|
||||
static constexpr unsigned timestamp_offset = flags_size;
|
||||
static constexpr unsigned timestamp_size = 8;
|
||||
static constexpr unsigned expiry_offset = timestamp_offset + timestamp_size;
|
||||
static constexpr unsigned expiry_size = 8;
|
||||
static constexpr unsigned deletion_time_offset = timestamp_offset + timestamp_size;
|
||||
static constexpr unsigned deletion_time_size = 8;
|
||||
static constexpr unsigned ttl_offset = expiry_offset + expiry_size;
|
||||
static constexpr unsigned ttl_size = 4;
|
||||
friend class counter_cell_builder;
|
||||
private:
|
||||
static bool is_counter_update(atomic_cell_value_view cell) {
|
||||
return cell.front() & COUNTER_UPDATE_FLAG;
|
||||
}
|
||||
static bool is_live(atomic_cell_value_view cell) {
|
||||
return cell.front() & LIVE_FLAG;
|
||||
}
|
||||
static bool is_live_and_has_ttl(atomic_cell_value_view cell) {
|
||||
return cell.front() & EXPIRY_FLAG;
|
||||
}
|
||||
static bool is_dead(atomic_cell_value_view cell) {
|
||||
return !is_live(cell);
|
||||
}
|
||||
// Can be called on live and dead cells
|
||||
static api::timestamp_type timestamp(atomic_cell_value_view cell) {
|
||||
return get_field<api::timestamp_type>(cell, timestamp_offset);
|
||||
}
|
||||
static void set_timestamp(atomic_cell_value_mutable_view& cell, api::timestamp_type ts) {
|
||||
set_field(cell, timestamp_offset, ts);
|
||||
}
|
||||
// Can be called on live cells only
|
||||
private:
|
||||
template <mutable_view is_mutable>
|
||||
static managed_bytes_basic_view<is_mutable> do_get_value(managed_bytes_basic_view<is_mutable> cell) {
|
||||
auto expiry_field_size = bool(cell.front() & EXPIRY_FLAG) * (expiry_size + ttl_size);
|
||||
auto value_offset = flags_size + timestamp_size + expiry_field_size;
|
||||
cell.remove_prefix(value_offset);
|
||||
return cell;
|
||||
}
|
||||
public:
|
||||
static atomic_cell_value_view value(managed_bytes_view cell) {
|
||||
return do_get_value(cell);
|
||||
}
|
||||
static atomic_cell_value_mutable_view value(managed_bytes_mutable_view cell) {
|
||||
return do_get_value(cell);
|
||||
}
|
||||
// Can be called on live counter update cells only
|
||||
static int64_t counter_update_value(atomic_cell_value_view cell) {
|
||||
return get_field<int64_t>(cell, flags_size + timestamp_size);
|
||||
}
|
||||
// Can be called only when is_dead() is true.
|
||||
static gc_clock::time_point deletion_time(atomic_cell_value_view cell) {
|
||||
assert(is_dead(cell));
|
||||
return gc_clock::time_point(gc_clock::duration(get_field<int64_t>(cell, deletion_time_offset)));
|
||||
}
|
||||
// Can be called only when is_live_and_has_ttl() is true.
|
||||
static gc_clock::time_point expiry(atomic_cell_value_view cell) {
|
||||
assert(is_live_and_has_ttl(cell));
|
||||
auto expiry = get_field<int64_t>(cell, expiry_offset);
|
||||
return gc_clock::time_point(gc_clock::duration(expiry));
|
||||
}
|
||||
// Can be called only when is_live_and_has_ttl() is true.
|
||||
static gc_clock::duration ttl(atomic_cell_value_view cell) {
|
||||
assert(is_live_and_has_ttl(cell));
|
||||
return gc_clock::duration(get_field<int32_t>(cell, ttl_offset));
|
||||
}
|
||||
static managed_bytes make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time) {
|
||||
managed_bytes b(managed_bytes::initialized_later(), flags_size + timestamp_size + deletion_time_size);
|
||||
b[0] = 0;
|
||||
set_field(b, timestamp_offset, timestamp);
|
||||
set_field(b, deletion_time_offset, static_cast<int64_t>(deletion_time.time_since_epoch().count()));
|
||||
return b;
|
||||
}
|
||||
template <FragmentRange Buffer>
|
||||
static managed_bytes make_live(api::timestamp_type timestamp, const Buffer& value) {
|
||||
auto value_offset = flags_size + timestamp_size;
|
||||
managed_bytes b(managed_bytes::initialized_later(), value_offset + value.size_bytes());
|
||||
b[0] = LIVE_FLAG;
|
||||
set_field(b, timestamp_offset, timestamp);
|
||||
set_value(b, value_offset, value);
|
||||
return b;
|
||||
}
|
||||
static managed_bytes make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
|
||||
auto value_offset = flags_size + timestamp_size;
|
||||
managed_bytes b(managed_bytes::initialized_later(), value_offset + sizeof(value));
|
||||
b[0] = LIVE_FLAG | COUNTER_UPDATE_FLAG;
|
||||
set_field(b, timestamp_offset, timestamp);
|
||||
set_field(b, value_offset, value);
|
||||
return b;
|
||||
}
|
||||
template <FragmentRange Buffer>
|
||||
static managed_bytes make_live(api::timestamp_type timestamp, const Buffer& value, gc_clock::time_point expiry, gc_clock::duration ttl) {
|
||||
auto value_offset = flags_size + timestamp_size + expiry_size + ttl_size;
|
||||
managed_bytes b(managed_bytes::initialized_later(), value_offset + value.size_bytes());
|
||||
b[0] = EXPIRY_FLAG | LIVE_FLAG;
|
||||
set_field(b, timestamp_offset, timestamp);
|
||||
set_field(b, expiry_offset, static_cast<int64_t>(expiry.time_since_epoch().count()));
|
||||
set_field(b, ttl_offset, static_cast<int32_t>(ttl.count()));
|
||||
set_value(b, value_offset, value);
|
||||
return b;
|
||||
}
|
||||
static managed_bytes make_live_uninitialized(api::timestamp_type timestamp, size_t size) {
|
||||
auto value_offset = flags_size + timestamp_size;
|
||||
managed_bytes b(managed_bytes::initialized_later(), value_offset + size);
|
||||
b[0] = LIVE_FLAG;
|
||||
set_field(b, timestamp_offset, timestamp);
|
||||
return b;
|
||||
}
|
||||
template <mutable_view is_mutable>
|
||||
friend class basic_atomic_cell_view;
|
||||
friend class atomic_cell;
|
||||
};
|
||||
using atomic_cell_value_view = data::value_view;
|
||||
using atomic_cell_value_mutable_view = data::value_mutable_view;
|
||||
|
||||
/// View of an atomic cell
|
||||
template<mutable_view is_mutable>
|
||||
class basic_atomic_cell_view {
|
||||
protected:
|
||||
managed_bytes_basic_view<is_mutable> _view;
|
||||
friend class atomic_cell;
|
||||
data::cell::basic_atomic_cell_view<is_mutable> _view;
|
||||
friend class atomic_cell;
|
||||
public:
|
||||
using pointer_type = std::conditional_t<is_mutable == mutable_view::no, const uint8_t*, uint8_t*>;
|
||||
protected:
|
||||
void set_view(managed_bytes_basic_view<is_mutable> v) {
|
||||
_view = v;
|
||||
}
|
||||
basic_atomic_cell_view() = default;
|
||||
explicit basic_atomic_cell_view(managed_bytes_basic_view<is_mutable> v) : _view(std::move(v)) { }
|
||||
explicit basic_atomic_cell_view(data::cell::basic_atomic_cell_view<is_mutable> v)
|
||||
: _view(std::move(v)) { }
|
||||
|
||||
basic_atomic_cell_view(const data::type_info& ti, pointer_type ptr)
|
||||
: _view(data::cell::make_atomic_cell_view(ti, ptr))
|
||||
{ }
|
||||
|
||||
friend class atomic_cell_or_collection;
|
||||
public:
|
||||
operator basic_atomic_cell_view<mutable_view::no>() const noexcept {
|
||||
return basic_atomic_cell_view<mutable_view::no>(_view);
|
||||
}
|
||||
|
||||
void swap(basic_atomic_cell_view& other) noexcept {
|
||||
using std::swap;
|
||||
swap(_view, other._view);
|
||||
}
|
||||
|
||||
bool is_counter_update() const {
|
||||
return atomic_cell_type::is_counter_update(_view);
|
||||
return _view.is_counter_update();
|
||||
}
|
||||
bool is_live() const {
|
||||
return atomic_cell_type::is_live(_view);
|
||||
return _view.is_live();
|
||||
}
|
||||
bool is_live(tombstone t, bool is_counter) const {
|
||||
return is_live() && !is_covered_by(t, is_counter);
|
||||
@@ -233,72 +83,73 @@ public:
|
||||
return is_live() && !is_covered_by(t, is_counter) && !has_expired(now);
|
||||
}
|
||||
bool is_live_and_has_ttl() const {
|
||||
return atomic_cell_type::is_live_and_has_ttl(_view);
|
||||
return _view.is_expiring();
|
||||
}
|
||||
bool is_dead(gc_clock::time_point now) const {
|
||||
return atomic_cell_type::is_dead(_view) || has_expired(now);
|
||||
return !is_live() || has_expired(now);
|
||||
}
|
||||
bool is_covered_by(tombstone t, bool is_counter) const {
|
||||
return timestamp() <= t.timestamp || (is_counter && t.timestamp != api::missing_timestamp);
|
||||
}
|
||||
// Can be called on live and dead cells
|
||||
api::timestamp_type timestamp() const {
|
||||
return atomic_cell_type::timestamp(_view);
|
||||
return _view.timestamp();
|
||||
}
|
||||
void set_timestamp(api::timestamp_type ts) {
|
||||
atomic_cell_type::set_timestamp(_view, ts);
|
||||
_view.set_timestamp(ts);
|
||||
}
|
||||
// Can be called on live cells only
|
||||
atomic_cell_value_basic_view<is_mutable> value() const {
|
||||
return atomic_cell_type::value(_view);
|
||||
data::basic_value_view<is_mutable> value() const {
|
||||
return _view.value();
|
||||
}
|
||||
// Can be called on live cells only
|
||||
size_t value_size() const {
|
||||
return atomic_cell_type::value(_view).size();
|
||||
return _view.value_size();
|
||||
}
|
||||
bool is_value_fragmented() const {
|
||||
return _view.is_fragmented();
|
||||
return _view.is_value_fragmented();
|
||||
}
|
||||
// Can be called on live counter update cells only
|
||||
int64_t counter_update_value() const {
|
||||
return atomic_cell_type::counter_update_value(_view);
|
||||
return _view.counter_update_value();
|
||||
}
|
||||
// Can be called only when is_dead(gc_clock::time_point)
|
||||
gc_clock::time_point deletion_time() const {
|
||||
return !is_live() ? atomic_cell_type::deletion_time(_view) : expiry() - ttl();
|
||||
return !is_live() ? _view.deletion_time() : expiry() - ttl();
|
||||
}
|
||||
// Can be called only when is_live_and_has_ttl()
|
||||
gc_clock::time_point expiry() const {
|
||||
return atomic_cell_type::expiry(_view);
|
||||
return _view.expiry();
|
||||
}
|
||||
// Can be called only when is_live_and_has_ttl()
|
||||
gc_clock::duration ttl() const {
|
||||
return atomic_cell_type::ttl(_view);
|
||||
return _view.ttl();
|
||||
}
|
||||
// Can be called on live and dead cells
|
||||
bool has_expired(gc_clock::time_point now) const {
|
||||
return is_live_and_has_ttl() && expiry() <= now;
|
||||
}
|
||||
|
||||
managed_bytes_view serialize() const {
|
||||
return _view;
|
||||
bytes_view serialize() const {
|
||||
return _view.serialize();
|
||||
}
|
||||
};
|
||||
|
||||
class atomic_cell_view final : public basic_atomic_cell_view<mutable_view::no> {
|
||||
atomic_cell_view(managed_bytes_view v)
|
||||
: basic_atomic_cell_view(v) {}
|
||||
atomic_cell_view(const data::type_info& ti, const uint8_t* data)
|
||||
: basic_atomic_cell_view<mutable_view::no>(ti, data) {}
|
||||
|
||||
template<mutable_view is_mutable>
|
||||
atomic_cell_view(basic_atomic_cell_view<is_mutable> view)
|
||||
: basic_atomic_cell_view<mutable_view::no>(view) {}
|
||||
atomic_cell_view(data::cell::basic_atomic_cell_view<is_mutable> view)
|
||||
: basic_atomic_cell_view<mutable_view::no>(view) { }
|
||||
friend class atomic_cell;
|
||||
public:
|
||||
static atomic_cell_view from_bytes(const abstract_type& t, managed_bytes_view v) {
|
||||
return atomic_cell_view(v);
|
||||
static atomic_cell_view from_bytes(const data::type_info& ti, const imr::utils::object<data::cell::structure>& data) {
|
||||
return atomic_cell_view(ti, data.get());
|
||||
}
|
||||
static atomic_cell_view from_bytes(const abstract_type& t, bytes_view v) {
|
||||
return atomic_cell_view(managed_bytes_view(v));
|
||||
|
||||
static atomic_cell_view from_bytes(const data::type_info& ti, bytes_view bv) {
|
||||
return atomic_cell_view(ti, reinterpret_cast<const uint8_t*>(bv.begin()));
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const atomic_cell_view& acv);
|
||||
@@ -313,11 +164,11 @@ public:
|
||||
};
|
||||
|
||||
class atomic_cell_mutable_view final : public basic_atomic_cell_view<mutable_view::yes> {
|
||||
atomic_cell_mutable_view(managed_bytes_mutable_view data)
|
||||
: basic_atomic_cell_view(data) {}
|
||||
atomic_cell_mutable_view(const data::type_info& ti, uint8_t* data)
|
||||
: basic_atomic_cell_view<mutable_view::yes>(ti, data) {}
|
||||
public:
|
||||
static atomic_cell_mutable_view from_bytes(const abstract_type& t, managed_bytes_mutable_view v) {
|
||||
return atomic_cell_mutable_view(v);
|
||||
static atomic_cell_mutable_view from_bytes(const data::type_info& ti, imr::utils::object<data::cell::structure>& data) {
|
||||
return atomic_cell_mutable_view(ti, data.get());
|
||||
}
|
||||
|
||||
friend class atomic_cell;
|
||||
@@ -326,31 +177,26 @@ public:
|
||||
using atomic_cell_ref = atomic_cell_mutable_view;
|
||||
|
||||
class atomic_cell final : public basic_atomic_cell_view<mutable_view::yes> {
|
||||
managed_bytes _data;
|
||||
atomic_cell(managed_bytes b) : _data(std::move(b)) {
|
||||
set_view(_data);
|
||||
}
|
||||
|
||||
using imr_object_type = imr::utils::object<data::cell::structure>;
|
||||
imr_object_type _data;
|
||||
atomic_cell(const data::type_info& ti, imr::utils::object<data::cell::structure>&& data)
|
||||
: basic_atomic_cell_view<mutable_view::yes>(ti, data.get()), _data(std::move(data)) {}
|
||||
public:
|
||||
class collection_member_tag;
|
||||
using collection_member = bool_class<collection_member_tag>;
|
||||
|
||||
atomic_cell(atomic_cell&& o) noexcept : _data(std::move(o._data)) {
|
||||
set_view(_data);
|
||||
}
|
||||
atomic_cell(atomic_cell&&) = default;
|
||||
atomic_cell& operator=(const atomic_cell&) = delete;
|
||||
atomic_cell& operator=(atomic_cell&& o) {
|
||||
_data = std::move(o._data);
|
||||
set_view(_data);
|
||||
return *this;
|
||||
atomic_cell& operator=(atomic_cell&&) = default;
|
||||
void swap(atomic_cell& other) noexcept {
|
||||
basic_atomic_cell_view<mutable_view::yes>::swap(other);
|
||||
_data.swap(other._data);
|
||||
}
|
||||
operator atomic_cell_view() const { return atomic_cell_view(managed_bytes_view(_data)); }
|
||||
operator atomic_cell_view() const { return atomic_cell_view(_view); }
|
||||
atomic_cell(const abstract_type& t, atomic_cell_view other);
|
||||
static atomic_cell make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time);
|
||||
static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value,
|
||||
collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, managed_bytes_view value,
|
||||
collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, ser::buffer_view<bytes_ostream::fragment_iterator> value,
|
||||
collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, const fragmented_temporary_buffer::view& value,
|
||||
@@ -362,8 +208,6 @@ public:
|
||||
static atomic_cell make_live_counter_update(api::timestamp_type timestamp, int64_t value);
|
||||
static atomic_cell make_live(const abstract_type&, api::timestamp_type timestamp, bytes_view value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type&, api::timestamp_type timestamp, managed_bytes_view value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type&, api::timestamp_type timestamp, ser::buffer_view<bytes_ostream::fragment_iterator> value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type&, api::timestamp_type timestamp, const fragmented_temporary_buffer::view& value,
|
||||
|
||||
@@ -52,7 +52,9 @@ struct appending_hash<atomic_cell_view> {
|
||||
feed_hash(h, cell.timestamp());
|
||||
if (cell.is_live()) {
|
||||
if (cdef.is_counter()) {
|
||||
::feed_hash(h, counter_cell_view(cell));
|
||||
counter_cell_view::with_linearized(cell, [&] (counter_cell_view ccv) {
|
||||
::feed_hash(h, ccv);
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (cell.is_live_and_has_ttl()) {
|
||||
|
||||
@@ -26,14 +26,20 @@
|
||||
#include "schema.hh"
|
||||
#include "hashing.hh"
|
||||
|
||||
#include "imr/utils.hh"
|
||||
|
||||
// A variant type that can hold either an atomic_cell, or a serialized collection.
|
||||
// Which type is stored is determined by the schema.
|
||||
// Has an "empty" state.
|
||||
// Objects moved-from are left in an empty state.
|
||||
class atomic_cell_or_collection final {
|
||||
managed_bytes _data;
|
||||
// FIXME: This has made us lose small-buffer optimisation. Unfortunately,
|
||||
// due to the changed cell format it would be less effective now, anyway.
|
||||
// Measure the actual impact because any attempts to fix this will become
|
||||
// irrelevant once rows are converted to the IMR as well, so maybe we can
|
||||
// live with this like that.
|
||||
using imr_object_type = imr::utils::object<data::cell::structure>;
|
||||
imr_object_type _data;
|
||||
private:
|
||||
atomic_cell_or_collection(managed_bytes&& data) : _data(std::move(data)) {}
|
||||
atomic_cell_or_collection(imr::utils::object<data::cell::structure>&& data) : _data(std::move(data)) {}
|
||||
public:
|
||||
atomic_cell_or_collection() = default;
|
||||
atomic_cell_or_collection(atomic_cell_or_collection&&) = default;
|
||||
@@ -43,16 +49,20 @@ public:
|
||||
atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
|
||||
atomic_cell_or_collection(const abstract_type& at, atomic_cell_view acv);
|
||||
static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
|
||||
atomic_cell_view as_atomic_cell(const column_definition& cdef) const { return atomic_cell_view::from_bytes(*cdef.type, _data); }
|
||||
atomic_cell_mutable_view as_mutable_atomic_cell(const column_definition& cdef) { return atomic_cell_mutable_view::from_bytes(*cdef.type, _data); }
|
||||
atomic_cell_view as_atomic_cell(const column_definition& cdef) const { return atomic_cell_view::from_bytes(cdef.type->imr_state().type_info(), _data); }
|
||||
atomic_cell_ref as_atomic_cell_ref(const column_definition& cdef) { return atomic_cell_mutable_view::from_bytes(cdef.type->imr_state().type_info(), _data); }
|
||||
atomic_cell_mutable_view as_mutable_atomic_cell(const column_definition& cdef) { return atomic_cell_mutable_view::from_bytes(cdef.type->imr_state().type_info(), _data); }
|
||||
atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm._data)) { }
|
||||
atomic_cell_or_collection copy(const abstract_type&) const;
|
||||
explicit operator bool() const {
|
||||
return !_data.empty();
|
||||
return bool(_data);
|
||||
}
|
||||
static constexpr bool can_use_mutable_view() {
|
||||
return true;
|
||||
}
|
||||
void swap(atomic_cell_or_collection& other) noexcept {
|
||||
_data.swap(other._data);
|
||||
}
|
||||
static atomic_cell_or_collection from_collection_mutation(collection_mutation data) { return std::move(data._data); }
|
||||
collection_mutation_view as_collection_mutation() const;
|
||||
bytes_view serialize() const;
|
||||
@@ -72,3 +82,12 @@ public:
|
||||
};
|
||||
friend std::ostream& operator<<(std::ostream&, const printer&);
|
||||
};
|
||||
|
||||
namespace std {
|
||||
|
||||
inline void swap(atomic_cell_or_collection& a, atomic_cell_or_collection& b) noexcept
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@ static future<> create_metadata_table_if_missing_impl(
|
||||
b.set_uuid(uuid);
|
||||
schema_ptr table = b.build();
|
||||
return ignore_existing([&mm, table = std::move(table)] () {
|
||||
return mm.announce_new_column_family(table);
|
||||
return mm.announce_new_column_family(table, false);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -108,7 +108,7 @@ future<> wait_for_schema_agreement(::service::migration_manager& mm, const datab
|
||||
});
|
||||
}
|
||||
|
||||
::service::query_state& internal_distributed_query_state() noexcept {
|
||||
const timeout_config& internal_distributed_timeout_config() noexcept {
|
||||
#ifdef DEBUG
|
||||
// Give the much slower debug tests more headroom for completing auth queries.
|
||||
static const auto t = 30s;
|
||||
@@ -116,9 +116,7 @@ future<> wait_for_schema_agreement(::service::migration_manager& mm, const datab
|
||||
static const auto t = 5s;
|
||||
#endif
|
||||
static const timeout_config tc{t, t, t, t, t, t, t};
|
||||
static thread_local ::service::client_state cs(::service::client_state::internal_tag{}, tc);
|
||||
static thread_local ::service::query_state qs(cs, empty_service_permit());
|
||||
return qs;
|
||||
return tc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "log.hh"
|
||||
#include "seastarx.hh"
|
||||
#include "utils/exponential_backoff_retry.hh"
|
||||
#include "service/query_state.hh"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
@@ -88,6 +87,6 @@ future<> wait_for_schema_agreement(::service::migration_manager&, const database
|
||||
///
|
||||
/// Time-outs for internal, non-local CQL queries.
|
||||
///
|
||||
::service::query_state& internal_distributed_query_state() noexcept;
|
||||
const timeout_config& internal_distributed_timeout_config() noexcept;
|
||||
|
||||
}
|
||||
|
||||
@@ -103,6 +103,7 @@ future<bool> default_authorizer::any_granted() const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config,
|
||||
{},
|
||||
true).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return !results->empty();
|
||||
@@ -115,7 +116,8 @@ future<> default_authorizer::migrate_legacy_metadata() const {
|
||||
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
return do_with(
|
||||
row.get_as<sstring>("username"),
|
||||
@@ -195,6 +197,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config,
|
||||
{*maybe_role.name, r.name()}).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return permissions::NONE;
|
||||
@@ -223,7 +226,7 @@ default_authorizer::modify(
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
|
||||
});
|
||||
}
|
||||
@@ -248,7 +251,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{},
|
||||
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
std::vector<permission_details> all_details;
|
||||
@@ -275,7 +278,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name) const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
@@ -295,6 +298,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config,
|
||||
{resource.name()}).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
try {
|
||||
auto res = f.get0();
|
||||
@@ -311,6 +315,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config,
|
||||
{r.get_as<sstring>(ROLE_NAME), resource.name()}).discard_result().handle_exception(
|
||||
[resource](auto ep) {
|
||||
try {
|
||||
|
||||
@@ -114,7 +114,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
auto username = row.get_as<sstring>("username");
|
||||
auto salted_hash = row.get_as<sstring>(SALTED_HASH);
|
||||
@@ -122,7 +122,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
return _qp.execute_internal(
|
||||
update_row_query(),
|
||||
consistency_for_user(username),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{std::move(salted_hash), username}).discard_result();
|
||||
}).finally([results] {});
|
||||
}).then([] {
|
||||
@@ -139,7 +139,7 @@ future<> password_authenticator::create_default_if_missing() const {
|
||||
return _qp.execute_internal(
|
||||
update_row_query(),
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME}).then([](auto&&) {
|
||||
plogger.info("Created default superuser authentication record.");
|
||||
});
|
||||
@@ -236,7 +236,7 @@ future<authenticated_user> password_authenticator::authenticate(
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_user(username),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{username},
|
||||
true);
|
||||
}).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
@@ -270,7 +270,7 @@ future<> password_authenticator::create(std::string_view role_name, const authen
|
||||
return _qp.execute_internal(
|
||||
update_row_query(),
|
||||
consistency_for_user(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -287,7 +287,7 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_user(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -299,7 +299,7 @@ future<> password_authenticator::drop(std::string_view name) const {
|
||||
|
||||
return _qp.execute_internal(
|
||||
query, consistency_for_user(name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(name)}).discard_result();
|
||||
}
|
||||
|
||||
|
||||
@@ -68,13 +68,14 @@ future<bool> default_role_row_satisfies(
|
||||
return qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
true).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
@@ -99,7 +100,7 @@ future<bool> any_nondefault_role_row_satisfies(
|
||||
return qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -154,7 +154,7 @@ future<> service::create_keyspace_if_missing(::service::migration_manager& mm) c
|
||||
|
||||
// We use min_timestamp so that default keyspace metadata will loose with any manual adjustments.
|
||||
// See issue #2129.
|
||||
return mm.announce_new_keyspace(ksm, api::min_timestamp);
|
||||
return mm.announce_new_keyspace(ksm, api::min_timestamp, false);
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
@@ -210,6 +210,7 @@ future<bool> service::has_existing_legacy_users() const {
|
||||
return _qp.execute_internal(
|
||||
default_user_query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
true).then([this](auto results) {
|
||||
if (!results->empty()) {
|
||||
@@ -219,6 +220,7 @@ future<bool> service::has_existing_legacy_users() const {
|
||||
return _qp.execute_internal(
|
||||
default_user_query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config,
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
true).then([this](auto results) {
|
||||
if (!results->empty()) {
|
||||
@@ -227,7 +229,8 @@ future<bool> service::has_existing_legacy_users() const {
|
||||
|
||||
return _qp.execute_internal(
|
||||
all_users_query,
|
||||
db::consistency_level::QUORUM).then([](auto results) {
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config).then([](auto results) {
|
||||
return make_ready_future<bool>(!results->empty());
|
||||
});
|
||||
});
|
||||
@@ -368,13 +371,10 @@ bool is_enforcing(const service& ser) {
|
||||
return enforcing_authorizer || enforcing_authenticator;
|
||||
}
|
||||
|
||||
bool is_protected(const service& ser, command_desc cmd) noexcept {
|
||||
if (cmd.type_ == command_desc::type::ALTER_WITH_OPTS) {
|
||||
return false; // Table attributes are OK to modify; see #7057.
|
||||
}
|
||||
return ser.underlying_role_manager().protected_resources().contains(cmd.resource)
|
||||
|| ser.underlying_authenticator().protected_resources().contains(cmd.resource)
|
||||
|| ser.underlying_authorizer().protected_resources().contains(cmd.resource);
|
||||
bool is_protected(const service& ser, const resource& r) noexcept {
|
||||
return ser.underlying_role_manager().protected_resources().contains(r)
|
||||
|| ser.underlying_authenticator().protected_resources().contains(r)
|
||||
|| ser.underlying_authorizer().protected_resources().contains(r);
|
||||
}
|
||||
|
||||
static void validate_authentication_options_are_supported(
|
||||
|
||||
@@ -181,21 +181,10 @@ future<permission_set> get_permissions(const service&, const authenticated_user&
|
||||
///
|
||||
bool is_enforcing(const service&);
|
||||
|
||||
/// A description of a CQL command from which auth::service can tell whether or not this command could endanger
|
||||
/// internal data on which auth::service depends.
|
||||
struct command_desc {
|
||||
auth::permission permission; ///< Nature of the command's alteration.
|
||||
const ::auth::resource& resource; ///< Resource impacted by this command.
|
||||
enum class type {
|
||||
ALTER_WITH_OPTS, ///< Command is ALTER ... WITH ...
|
||||
OTHER
|
||||
} type_ = type::OTHER;
|
||||
};
|
||||
|
||||
///
|
||||
/// Protected resources cannot be modified even if the performer has permissions to do so.
|
||||
///
|
||||
bool is_protected(const service&, command_desc) noexcept;
|
||||
bool is_protected(const service&, const resource&) noexcept;
|
||||
|
||||
///
|
||||
/// Create a role with optional authentication information.
|
||||
|
||||
@@ -86,7 +86,7 @@ static future<std::optional<record>> find_record(cql3::query_processor& qp, std:
|
||||
return qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)},
|
||||
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
@@ -165,7 +165,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
|
||||
log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
|
||||
return make_ready_future<>();
|
||||
@@ -192,7 +192,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
role_config config;
|
||||
config.is_superuser = row.get_or<bool>("super", false);
|
||||
@@ -253,7 +253,7 @@ future<> standard_role_manager::create_or_replace(std::string_view role_name, co
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), c.is_superuser, c.can_login},
|
||||
true).discard_result();
|
||||
}
|
||||
@@ -296,7 +296,7 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
|
||||
build_column_assignments(u),
|
||||
meta::roles_table::role_col_name),
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result();
|
||||
});
|
||||
}
|
||||
@@ -315,7 +315,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
|
||||
return parallel_for_each(
|
||||
members->begin(),
|
||||
@@ -354,7 +354,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result();
|
||||
};
|
||||
|
||||
@@ -381,7 +381,7 @@ standard_role_manager::modify_membership(
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(grantee_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
|
||||
};
|
||||
|
||||
@@ -392,7 +392,7 @@ standard_role_manager::modify_membership(
|
||||
format("INSERT INTO {} (role, member) VALUES (?, ?)",
|
||||
meta::role_members_table::qualified_name),
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||
|
||||
case membership_change::remove:
|
||||
@@ -400,7 +400,7 @@ standard_role_manager::modify_membership(
|
||||
format("DELETE FROM {} WHERE role = ? AND member = ?",
|
||||
meta::role_members_table::qualified_name),
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -503,7 +503,7 @@ future<role_set> standard_role_manager::query_all() const {
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state()).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
role_set roles;
|
||||
|
||||
std::transform(
|
||||
|
||||
40
bytes.hh
40
bytes.hh
@@ -28,7 +28,6 @@
|
||||
#include <iosfwd>
|
||||
#include <functional>
|
||||
#include "utils/mutable_view.hh"
|
||||
#include <xxhash.h>
|
||||
|
||||
using bytes = basic_sstring<int8_t, uint32_t, 31, false>;
|
||||
using bytes_view = std::basic_string_view<int8_t>;
|
||||
@@ -36,10 +35,6 @@ using bytes_mutable_view = basic_mutable_view<bytes_view::value_type>;
|
||||
using bytes_opt = std::optional<bytes>;
|
||||
using sstring_view = std::string_view;
|
||||
|
||||
inline bytes to_bytes(bytes&& b) {
|
||||
return std::move(b);
|
||||
}
|
||||
|
||||
inline sstring_view to_sstring_view(bytes_view view) {
|
||||
return {reinterpret_cast<const char*>(view.data()), view.size()};
|
||||
}
|
||||
@@ -48,6 +43,17 @@ inline bytes_view to_bytes_view(sstring_view view) {
|
||||
return {reinterpret_cast<const int8_t*>(view.data()), view.size()};
|
||||
}
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<bytes_view> {
|
||||
size_t operator()(bytes_view v) const {
|
||||
return hash<sstring_view>()({reinterpret_cast<const char*>(v.begin()), v.size()});
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
struct fmt_hex {
|
||||
bytes_view& v;
|
||||
fmt_hex(bytes_view& v) noexcept : v(v) {}
|
||||
@@ -88,30 +94,6 @@ struct appending_hash<bytes_view> {
|
||||
}
|
||||
};
|
||||
|
||||
struct bytes_view_hasher : public hasher {
|
||||
XXH64_state_t _state;
|
||||
bytes_view_hasher(uint64_t seed = 0) noexcept {
|
||||
XXH64_reset(&_state, seed);
|
||||
}
|
||||
void update(const char* ptr, size_t length) noexcept {
|
||||
XXH64_update(&_state, ptr, length);
|
||||
}
|
||||
size_t finalize() {
|
||||
return static_cast<size_t>(XXH64_digest(&_state));
|
||||
}
|
||||
};
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<bytes_view> {
|
||||
size_t operator()(bytes_view v) const {
|
||||
bytes_view_hasher h;
|
||||
appending_hash<bytes_view>{}(h, v);
|
||||
return h.finalize();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
inline int32_t compare_unsigned(bytes_view v1, bytes_view v2) {
|
||||
auto size = std::min(v1.size(), v2.size());
|
||||
if (size) {
|
||||
|
||||
@@ -24,10 +24,9 @@
|
||||
#include <boost/range/iterator_range.hpp>
|
||||
|
||||
#include "bytes.hh"
|
||||
#include <seastar/core/unaligned.hh>
|
||||
#include "hashing.hh"
|
||||
#include <seastar/core/simple-stream.hh>
|
||||
#include <concepts>
|
||||
|
||||
/**
|
||||
* Utility for writing data into a buffer when its final size is not known up front.
|
||||
*
|
||||
@@ -40,7 +39,7 @@ public:
|
||||
using size_type = bytes::size_type;
|
||||
using value_type = bytes::value_type;
|
||||
using fragment_type = bytes_view;
|
||||
static constexpr size_type max_chunk_size() { return max_alloc_size() - sizeof(chunk); }
|
||||
static constexpr size_type max_chunk_size() { return 128 * 1024; }
|
||||
private:
|
||||
static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
|
||||
struct chunk {
|
||||
@@ -60,21 +59,13 @@ private:
|
||||
void operator delete(void* ptr) { free(ptr); }
|
||||
};
|
||||
static constexpr size_type default_chunk_size{512};
|
||||
static constexpr size_type max_alloc_size() { return 128 * 1024; }
|
||||
private:
|
||||
std::unique_ptr<chunk> _begin;
|
||||
chunk* _current;
|
||||
size_type _size;
|
||||
size_type _initial_chunk_size = default_chunk_size;
|
||||
public:
|
||||
class fragment_iterator {
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = bytes_view;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = bytes_view*;
|
||||
using reference = bytes_view&;
|
||||
private:
|
||||
class fragment_iterator : public std::iterator<std::input_iterator_tag, bytes_view> {
|
||||
chunk* _current = nullptr;
|
||||
public:
|
||||
fragment_iterator() = default;
|
||||
@@ -134,15 +125,16 @@ private:
|
||||
return _current->size - _current->offset;
|
||||
}
|
||||
// Figure out next chunk size.
|
||||
// - must be enough for data_size + sizeof(chunk)
|
||||
// - must be enough for data_size
|
||||
// - must be at least _initial_chunk_size
|
||||
// - try to double each time to prevent too many allocations
|
||||
// - should not exceed max_alloc_size, unless data_size requires so
|
||||
// - do not exceed max_chunk_size
|
||||
size_type next_alloc_size(size_t data_size) const {
|
||||
auto next_size = _current
|
||||
? _current->size * 2
|
||||
: _initial_chunk_size;
|
||||
next_size = std::min(next_size, max_alloc_size());
|
||||
next_size = std::min(next_size, max_chunk_size());
|
||||
// FIXME: check for overflow?
|
||||
return std::max<size_type>(next_size, data_size + sizeof(chunk));
|
||||
}
|
||||
// Makes room for a contiguous region of given size.
|
||||
@@ -234,9 +226,9 @@ public:
|
||||
};
|
||||
|
||||
// Returns a place holder for a value to be written later.
|
||||
template <std::integral T>
|
||||
template <typename T>
|
||||
inline
|
||||
place_holder<T>
|
||||
std::enable_if_t<std::is_fundamental<T>::value, place_holder<T>>
|
||||
write_place_holder() {
|
||||
return place_holder<T>{alloc(sizeof(T))};
|
||||
}
|
||||
|
||||
@@ -102,7 +102,7 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
// Points to the underlying reader conforming to _schema,
|
||||
// either to *_underlying_holder or _read_context->underlying().underlying().
|
||||
flat_mutation_reader* _underlying = nullptr;
|
||||
flat_mutation_reader_opt _underlying_holder;
|
||||
std::optional<flat_mutation_reader> _underlying_holder;
|
||||
|
||||
future<> do_fill_buffer(db::timeout_clock::time_point);
|
||||
future<> ensure_underlying(db::timeout_clock::time_point);
|
||||
@@ -112,7 +112,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
void move_to_next_range();
|
||||
void move_to_range(query::clustering_row_ranges::const_iterator);
|
||||
void move_to_next_entry();
|
||||
void maybe_drop_last_entry() noexcept;
|
||||
void add_to_buffer(const partition_snapshot_row_cursor&);
|
||||
void add_clustering_row_to_buffer(mutation_fragment&&);
|
||||
void add_to_buffer(range_tombstone&&);
|
||||
@@ -123,7 +122,6 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
bool can_populate() const;
|
||||
// Marks the range between _last_row (exclusive) and _next_row (exclusive) as continuous,
|
||||
// provided that the underlying reader still matches the latest version of the partition.
|
||||
// Invalidates _last_row.
|
||||
void maybe_update_continuity();
|
||||
// Tries to ensure that the lower bound of the current population range exists.
|
||||
// Returns false if it failed and range cannot be populated.
|
||||
@@ -165,12 +163,11 @@ public:
|
||||
cache_flat_mutation_reader(const cache_flat_mutation_reader&) = delete;
|
||||
cache_flat_mutation_reader(cache_flat_mutation_reader&&) = delete;
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override;
|
||||
virtual future<> next_partition() override {
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_end_of_stream = true;
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point timeout) override {
|
||||
clear_buffer();
|
||||
@@ -267,9 +264,6 @@ future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_poin
|
||||
}
|
||||
_state = state::reading_from_underlying;
|
||||
_population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
|
||||
if (!_read_context->partition_exists()) {
|
||||
return read_from_underlying(timeout);
|
||||
}
|
||||
auto end = _next_row_in_range ? position_in_partition(_next_row.position())
|
||||
: position_in_partition(_upper_bound);
|
||||
return _underlying->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
|
||||
@@ -334,6 +328,7 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
|
||||
}
|
||||
if (_next_row_in_range) {
|
||||
maybe_update_continuity();
|
||||
_last_row = _next_row;
|
||||
add_to_buffer(_next_row);
|
||||
try {
|
||||
move_to_next_entry();
|
||||
@@ -346,14 +341,14 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
|
||||
if (no_clustering_row_between(*_schema, _upper_bound, _next_row.position())) {
|
||||
this->maybe_update_continuity();
|
||||
} else if (can_populate()) {
|
||||
rows_entry::tri_compare cmp(*_schema);
|
||||
rows_entry::compare less(*_schema);
|
||||
auto& rows = _snp->version()->partition().clustered_rows();
|
||||
if (query::is_single_row(*_schema, *_ck_ranges_curr)) {
|
||||
with_allocator(_snp->region().allocator(), [&] {
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
current_allocator().construct<rows_entry>(_ck_ranges_curr->start()->value()));
|
||||
// Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
|
||||
auto insert_result = rows.insert_before_hint(_next_row.get_iterator_in_latest_version(), *e, cmp);
|
||||
auto insert_result = rows.insert_check(_next_row.get_iterator_in_latest_version(), *e, less);
|
||||
auto inserted = insert_result.second;
|
||||
auto it = insert_result.first;
|
||||
if (inserted) {
|
||||
@@ -369,7 +364,7 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
current_allocator().construct<rows_entry>(*_schema, _upper_bound, is_dummy::yes, is_continuous::yes));
|
||||
// Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
|
||||
auto insert_result = rows.insert_before_hint(_next_row.get_iterator_in_latest_version(), *e, cmp);
|
||||
auto insert_result = rows.insert_check(_next_row.get_iterator_in_latest_version(), *e, less);
|
||||
auto inserted = insert_result.second;
|
||||
if (inserted) {
|
||||
clogger.trace("csm {}: inserted dummy at {}", fmt::ptr(this), _upper_bound);
|
||||
@@ -379,7 +374,6 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
|
||||
clogger.trace("csm {}: mark {} as continuous", fmt::ptr(this), insert_result.first->position());
|
||||
insert_result.first->set_continuous(true);
|
||||
}
|
||||
maybe_drop_last_entry();
|
||||
});
|
||||
}
|
||||
} else {
|
||||
@@ -410,12 +404,12 @@ bool cache_flat_mutation_reader::ensure_population_lower_bound() {
|
||||
if (!_last_row.is_in_latest_version()) {
|
||||
with_allocator(_snp->region().allocator(), [&] {
|
||||
auto& rows = _snp->version()->partition().clustered_rows();
|
||||
rows_entry::tri_compare cmp(*_schema);
|
||||
rows_entry::compare less(*_schema);
|
||||
// FIXME: Avoid the copy by inserting an incomplete clustering row
|
||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||
current_allocator().construct<rows_entry>(*_schema, *_last_row));
|
||||
e->set_continuous(false);
|
||||
auto insert_result = rows.insert_before_hint(rows.end(), *e, cmp);
|
||||
auto insert_result = rows.insert_check(rows.end(), *e, less);
|
||||
auto inserted = insert_result.second;
|
||||
if (inserted) {
|
||||
clogger.trace("csm {}: inserted lower bound dummy at {}", fmt::ptr(this), e->position());
|
||||
@@ -433,7 +427,6 @@ void cache_flat_mutation_reader::maybe_update_continuity() {
|
||||
with_allocator(_snp->region().allocator(), [&] {
|
||||
rows_entry& e = _next_row.ensure_entry_in_latest().row;
|
||||
e.set_continuous(true);
|
||||
maybe_drop_last_entry();
|
||||
});
|
||||
} else {
|
||||
_read_context->cache().on_mispopulate();
|
||||
@@ -462,17 +455,17 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
||||
clogger.trace("csm {}: populate({})", fmt::ptr(this), clustering_row::printer(*_schema, cr));
|
||||
_lsa_manager.run_in_update_section_with_allocator([this, &cr] {
|
||||
mutation_partition& mp = _snp->version()->partition();
|
||||
rows_entry::tri_compare cmp(*_schema);
|
||||
rows_entry::compare less(*_schema);
|
||||
|
||||
if (_read_context->digest_requested()) {
|
||||
cr.cells().prepare_hash(*_schema, column_kind::regular_column);
|
||||
}
|
||||
auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
|
||||
current_allocator().construct<rows_entry>(*_schema, cr.key(), cr.as_deletable_row()));
|
||||
current_allocator().construct<rows_entry>(*_schema, cr.key(), cr.tomb(), cr.marker(), cr.cells()));
|
||||
new_entry->set_continuous(false);
|
||||
auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
|
||||
: mp.clustered_rows().lower_bound(cr.key(), cmp);
|
||||
auto insert_result = mp.clustered_rows().insert_before_hint(it, *new_entry, cmp);
|
||||
: mp.clustered_rows().lower_bound(cr.key(), less);
|
||||
auto insert_result = mp.clustered_rows().insert_check(it, *new_entry, less);
|
||||
if (insert_result.second) {
|
||||
_snp->tracker()->insert(*new_entry);
|
||||
new_entry.release();
|
||||
@@ -515,7 +508,7 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
|
||||
// This guarantees that rts starts after any emitted clustering_row
|
||||
// and not before any emitted range tombstone.
|
||||
if (!less(_lower_bound, rts.position())) {
|
||||
rts.set_start(_lower_bound);
|
||||
rts.set_start(*_schema, _lower_bound);
|
||||
} else {
|
||||
_lower_bound = position_in_partition(rts.position());
|
||||
_lower_bound_changed = true;
|
||||
@@ -528,6 +521,7 @@ void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
|
||||
// We add the row to the buffer even when it's full.
|
||||
// This simplifies the code. For more info see #3139.
|
||||
if (_next_row_in_range) {
|
||||
_last_row = _next_row;
|
||||
add_to_buffer(_next_row);
|
||||
move_to_next_entry();
|
||||
} else {
|
||||
@@ -576,8 +570,8 @@ void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::con
|
||||
clogger.trace("csm {}: insert dummy at {}", fmt::ptr(this), _lower_bound);
|
||||
auto it = with_allocator(_lsa_manager.region().allocator(), [&] {
|
||||
auto& rows = _snp->version()->partition().clustered_rows();
|
||||
auto new_entry = alloc_strategy_unique_ptr<rows_entry>(current_allocator().construct<rows_entry>(*_schema, _lower_bound, is_dummy::yes, is_continuous::no));
|
||||
return rows.insert_before(_next_row.get_iterator_in_latest_version(), std::move(new_entry));
|
||||
auto new_entry = current_allocator().construct<rows_entry>(*_schema, _lower_bound, is_dummy::yes, is_continuous::no);
|
||||
return rows.insert_before(_next_row.get_iterator_in_latest_version(), *new_entry);
|
||||
});
|
||||
_snp->tracker()->insert(*it);
|
||||
_last_row = partition_snapshot_row_weakref(*_snp, it, true);
|
||||
@@ -589,38 +583,6 @@ void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::con
|
||||
}
|
||||
}
|
||||
|
||||
// Drops _last_row entry when possible without changing logical contents of the partition.
|
||||
// Call only when _last_row and _next_row are valid.
|
||||
// Calling after ensure_population_lower_bound() is ok.
|
||||
// _next_row must have a greater position than _last_row.
|
||||
// Invalidates references but keeps the _next_row valid.
|
||||
inline
|
||||
void cache_flat_mutation_reader::maybe_drop_last_entry() noexcept {
|
||||
// Drop dummy entry if it falls inside a continuous range.
|
||||
// This prevents unnecessary dummy entries from accumulating in cache and slowing down scans.
|
||||
//
|
||||
// Eviction can happen only from oldest versions to preserve the continuity non-overlapping rule
|
||||
// (See docs/design-notes/row_cache.md)
|
||||
//
|
||||
if (_last_row
|
||||
&& _last_row->dummy()
|
||||
&& _last_row->continuous()
|
||||
&& _snp->at_latest_version()
|
||||
&& _snp->at_oldest_version()) {
|
||||
|
||||
with_allocator(_snp->region().allocator(), [&] {
|
||||
_last_row->on_evicted(_read_context->cache()._tracker);
|
||||
});
|
||||
_last_row = nullptr;
|
||||
|
||||
// There could be iterators pointing to _last_row, invalidate them
|
||||
_snp->region().allocator().invalidate_references();
|
||||
|
||||
// Don't invalidate _next_row, move_to_next_entry() expects it to be still valid.
|
||||
_next_row.force_valid();
|
||||
}
|
||||
}
|
||||
|
||||
// _next_row must be inside the range.
|
||||
inline
|
||||
void cache_flat_mutation_reader::move_to_next_entry() {
|
||||
@@ -628,18 +590,14 @@ void cache_flat_mutation_reader::move_to_next_entry() {
|
||||
if (no_clustering_row_between(*_schema, _next_row.position(), _upper_bound)) {
|
||||
move_to_next_range();
|
||||
} else {
|
||||
auto new_last_row = partition_snapshot_row_weakref(_next_row);
|
||||
if (!_next_row.next()) {
|
||||
move_to_end();
|
||||
return;
|
||||
}
|
||||
_last_row = std::move(new_last_row);
|
||||
_next_row_in_range = !after_current_range(_next_row.position());
|
||||
clogger.trace("csm {}: next={}, cont={}, in_range={}", fmt::ptr(this), _next_row.position(), _next_row.continuous(), _next_row_in_range);
|
||||
if (!_next_row.continuous()) {
|
||||
start_reading_from_underlying();
|
||||
} else {
|
||||
maybe_drop_last_entry();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -660,13 +618,6 @@ void cache_flat_mutation_reader::add_to_buffer(const partition_snapshot_row_curs
|
||||
if (!row.dummy()) {
|
||||
_read_context->cache().on_row_hit();
|
||||
add_clustering_row_to_buffer(mutation_fragment(*_schema, _permit, row.row(_read_context->digest_requested())));
|
||||
} else {
|
||||
position_in_partition::less_compare less(*_schema);
|
||||
if (less(_lower_bound, row.position())) {
|
||||
_lower_bound = row.position();
|
||||
_lower_bound_changed = true;
|
||||
}
|
||||
_read_context->cache()._tracker.on_dummy_row_hit();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -693,7 +644,7 @@ void cache_flat_mutation_reader::add_to_buffer(range_tombstone&& rt) {
|
||||
return;
|
||||
}
|
||||
if (!less(_lower_bound, rt.position())) {
|
||||
rt.set_start(_lower_bound);
|
||||
rt.set_start(*_schema, _lower_bound);
|
||||
} else {
|
||||
_lower_bound = position_in_partition(rt.position());
|
||||
_lower_bound_changed = true;
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#include "idl/mutation.dist.impl.hh"
|
||||
#include <iostream>
|
||||
|
||||
canonical_mutation::canonical_mutation(bytes_ostream data)
|
||||
canonical_mutation::canonical_mutation(bytes data)
|
||||
: _data(std::move(data))
|
||||
{ }
|
||||
|
||||
@@ -45,7 +45,8 @@ canonical_mutation::canonical_mutation(const mutation& m)
|
||||
{
|
||||
mutation_partition_serializer part_ser(*m.schema(), m.partition());
|
||||
|
||||
ser::writer_of_canonical_mutation<bytes_ostream> wr(_data);
|
||||
bytes_ostream out;
|
||||
ser::writer_of_canonical_mutation<bytes_ostream> wr(out);
|
||||
std::move(wr).write_table_id(m.schema()->id())
|
||||
.write_schema_version(m.schema()->version())
|
||||
.write_key(m.key())
|
||||
@@ -53,6 +54,7 @@ canonical_mutation::canonical_mutation(const mutation& m)
|
||||
.partition([&] (auto wr) {
|
||||
part_ser.write(std::move(wr));
|
||||
}).end_canonical_mutation();
|
||||
_data = to_bytes(out.linearize());
|
||||
}
|
||||
|
||||
utils::UUID canonical_mutation::column_family_id() const {
|
||||
|
||||
@@ -32,9 +32,9 @@
|
||||
// Safe to access from other shards via const&.
|
||||
// Safe to pass serialized across nodes.
|
||||
class canonical_mutation {
|
||||
bytes_ostream _data;
|
||||
bytes _data;
|
||||
public:
|
||||
explicit canonical_mutation(bytes_ostream);
|
||||
explicit canonical_mutation(bytes);
|
||||
explicit canonical_mutation(const mutation&);
|
||||
|
||||
canonical_mutation(canonical_mutation&&) = default;
|
||||
@@ -51,7 +51,7 @@ public:
|
||||
|
||||
utils::UUID column_family_id() const;
|
||||
|
||||
const bytes_ostream& representation() const { return _data; }
|
||||
const bytes& representation() const { return _data; }
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const canonical_mutation& cm);
|
||||
};
|
||||
|
||||
@@ -33,13 +33,9 @@ template<typename T>
|
||||
struct cartesian_product {
|
||||
const std::vector<std::vector<T>>& _vec_of_vecs;
|
||||
public:
|
||||
class iterator {
|
||||
class iterator : public std::iterator<std::forward_iterator_tag, std::vector<T>> {
|
||||
public:
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = std::vector<T>;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = std::vector<T>*;
|
||||
using reference = std::vector<T>&;
|
||||
private:
|
||||
size_t _pos;
|
||||
const std::vector<std::vector<T>>* _vec_of_vecs;
|
||||
|
||||
@@ -22,13 +22,11 @@
|
||||
#include <boost/type.hpp>
|
||||
#include <random>
|
||||
#include <unordered_set>
|
||||
#include <algorithm>
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <algorithm>
|
||||
|
||||
#include "keys.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "database.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
@@ -39,8 +37,6 @@
|
||||
#include "gms/gossiper.hh"
|
||||
|
||||
#include "cdc/generation.hh"
|
||||
#include "cdc/cdc_options.hh"
|
||||
#include "cdc/generation_service.hh"
|
||||
|
||||
extern logging::logger cdc_log;
|
||||
|
||||
@@ -205,12 +201,12 @@ static std::vector<stream_id> create_stream_ids(
|
||||
class topology_description_generator final {
|
||||
const db::config& _cfg;
|
||||
const std::unordered_set<dht::token>& _bootstrap_tokens;
|
||||
const locator::token_metadata_ptr _tmptr;
|
||||
const locator::token_metadata& _token_metadata;
|
||||
const gms::gossiper& _gossiper;
|
||||
|
||||
// Compute a set of tokens that split the token ring into vnodes
|
||||
auto get_tokens() const {
|
||||
auto tokens = _tmptr->sorted_tokens();
|
||||
auto tokens = _token_metadata.sorted_tokens();
|
||||
auto it = tokens.insert(
|
||||
tokens.end(), _bootstrap_tokens.begin(), _bootstrap_tokens.end());
|
||||
std::sort(it, tokens.end());
|
||||
@@ -225,7 +221,7 @@ class topology_description_generator final {
|
||||
if (_bootstrap_tokens.contains(end)) {
|
||||
return {smp::count, _cfg.murmur3_partitioner_ignore_msb_bits()};
|
||||
} else {
|
||||
auto endpoint = _tmptr->get_endpoint(end);
|
||||
auto endpoint = _token_metadata.get_endpoint(end);
|
||||
if (!endpoint) {
|
||||
throw std::runtime_error(
|
||||
format("Can't find endpoint for token {}", end));
|
||||
@@ -250,11 +246,11 @@ public:
|
||||
topology_description_generator(
|
||||
const db::config& cfg,
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const locator::token_metadata_ptr tmptr,
|
||||
const locator::token_metadata& token_metadata,
|
||||
const gms::gossiper& gossiper)
|
||||
: _cfg(cfg)
|
||||
, _bootstrap_tokens(bootstrap_tokens)
|
||||
, _tmptr(std::move(tmptr))
|
||||
, _token_metadata(token_metadata)
|
||||
, _gossiper(gossiper)
|
||||
{}
|
||||
|
||||
@@ -324,7 +320,7 @@ topology_description limit_number_of_streams_if_needed(topology_description&& de
|
||||
}
|
||||
|
||||
size_t limit = std::max(limit_of_streams_in_topology_description(), desc.entries().size());
|
||||
if (limit >= streams_count) {
|
||||
if (limit >= size_t(streams_count)) {
|
||||
return std::move(desc);
|
||||
}
|
||||
size_t streams_per_vnode_limit = limit / desc.entries().size();
|
||||
@@ -341,16 +337,17 @@ topology_description limit_number_of_streams_if_needed(topology_description&& de
|
||||
return topology_description(std::move(entries));
|
||||
}
|
||||
|
||||
future<db_clock::time_point> make_new_cdc_generation(
|
||||
// Run inside seastar::async context.
|
||||
db_clock::time_point make_new_cdc_generation(
|
||||
const db::config& cfg,
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const locator::token_metadata_ptr tmptr,
|
||||
const locator::token_metadata& tm,
|
||||
const gms::gossiper& g,
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
std::chrono::milliseconds ring_delay,
|
||||
bool add_delay) {
|
||||
bool for_testing) {
|
||||
using namespace std::chrono;
|
||||
auto gen = topology_description_generator(cfg, bootstrap_tokens, tmptr, g).generate();
|
||||
auto gen = topology_description_generator(cfg, bootstrap_tokens, tm, g).generate();
|
||||
|
||||
// If the cluster is large we may end up with a generation that contains
|
||||
// large number of streams. This is problematic because we store the
|
||||
@@ -366,11 +363,11 @@ future<db_clock::time_point> make_new_cdc_generation(
|
||||
|
||||
// Begin the race.
|
||||
auto ts = db_clock::now() + (
|
||||
(!add_delay || ring_delay == milliseconds(0)) ? milliseconds(0) : (
|
||||
(for_testing || ring_delay == milliseconds(0)) ? milliseconds(0) : (
|
||||
2 * ring_delay + duration_cast<milliseconds>(generation_leeway)));
|
||||
co_await sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tmptr->count_normal_token_owners() });
|
||||
sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tm.count_normal_token_owners() }).get();
|
||||
|
||||
co_return ts;
|
||||
return ts;
|
||||
}
|
||||
|
||||
std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_address& endpoint, const gms::gossiper& g) {
|
||||
@@ -379,581 +376,63 @@ std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_ad
|
||||
return gms::versioned_value::cdc_streams_timestamp_from_string(streams_ts_string);
|
||||
}
|
||||
|
||||
static future<> do_update_streams_description(
|
||||
// Run inside seastar::async context.
|
||||
static void do_update_streams_description(
|
||||
db_clock::time_point streams_ts,
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
db::system_distributed_keyspace::context ctx) {
|
||||
if (co_await sys_dist_ks.cdc_desc_exists(streams_ts, ctx)) {
|
||||
cdc_log.info("Generation {}: streams description table already updated.", streams_ts);
|
||||
co_return;
|
||||
if (sys_dist_ks.cdc_desc_exists(streams_ts, ctx).get0()) {
|
||||
cdc_log.debug("update_streams_description: description of generation {} already inserted", streams_ts);
|
||||
return;
|
||||
}
|
||||
|
||||
// We might race with another node also inserting the description, but that's ok. It's an idempotent operation.
|
||||
|
||||
auto topo = co_await sys_dist_ks.read_cdc_topology_description(streams_ts, ctx);
|
||||
auto topo = sys_dist_ks.read_cdc_topology_description(streams_ts, ctx).get0();
|
||||
if (!topo) {
|
||||
throw no_generation_data_exception(streams_ts);
|
||||
throw std::runtime_error(format("could not find streams data for timestamp {}", streams_ts));
|
||||
}
|
||||
|
||||
co_await sys_dist_ks.create_cdc_desc(streams_ts, *topo, ctx);
|
||||
std::set<cdc::stream_id> streams_set;
|
||||
for (auto& entry: topo->entries()) {
|
||||
streams_set.insert(entry.streams.begin(), entry.streams.end());
|
||||
}
|
||||
|
||||
std::vector<cdc::stream_id> streams_vec(streams_set.begin(), streams_set.end());
|
||||
|
||||
sys_dist_ks.create_cdc_desc(streams_ts, streams_vec, ctx).get();
|
||||
cdc_log.info("CDC description table successfully updated with generation {}.", streams_ts);
|
||||
}
|
||||
|
||||
future<> update_streams_description(
|
||||
void update_streams_description(
|
||||
db_clock::time_point streams_ts,
|
||||
shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source& abort_src) {
|
||||
try {
|
||||
co_await do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
|
||||
} catch (...) {
|
||||
do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
|
||||
} catch(...) {
|
||||
cdc_log.warn(
|
||||
"Could not update CDC description table with generation {}: {}. Will retry in the background.",
|
||||
streams_ts, std::current_exception());
|
||||
|
||||
// It is safe to discard this future: we keep system distributed keyspace alive.
|
||||
(void)(([] (db_clock::time_point streams_ts,
|
||||
shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source& abort_src) -> future<> {
|
||||
(void)seastar::async([
|
||||
streams_ts, sys_dist_ks, get_num_token_owners = std::move(get_num_token_owners), &abort_src
|
||||
] {
|
||||
while (true) {
|
||||
co_await sleep_abortable(std::chrono::seconds(60), abort_src);
|
||||
sleep_abortable(std::chrono::seconds(60), abort_src).get();
|
||||
try {
|
||||
co_await do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
|
||||
co_return;
|
||||
do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
|
||||
return;
|
||||
} catch (...) {
|
||||
cdc_log.warn(
|
||||
"Could not update CDC description table with generation {}: {}. Will try again.",
|
||||
streams_ts, std::current_exception());
|
||||
}
|
||||
}
|
||||
})(streams_ts, std::move(sys_dist_ks), std::move(get_num_token_owners), abort_src));
|
||||
}
|
||||
}
|
||||
|
||||
static db_clock::time_point as_timepoint(const utils::UUID& uuid) {
|
||||
return db_clock::time_point{std::chrono::milliseconds(utils::UUID_gen::get_adjusted_timestamp(uuid))};
|
||||
}
|
||||
|
||||
static future<std::vector<db_clock::time_point>> get_cdc_desc_v1_timestamps(
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
abort_source& abort_src,
|
||||
const noncopyable_function<unsigned()>& get_num_token_owners) {
|
||||
while (true) {
|
||||
try {
|
||||
co_return co_await sys_dist_ks.get_cdc_desc_v1_timestamps({ get_num_token_owners() });
|
||||
} catch (...) {
|
||||
cdc_log.warn(
|
||||
"Failed to retrieve generation timestamps for rewriting: {}. Retrying in 60s.",
|
||||
std::current_exception());
|
||||
}
|
||||
co_await sleep_abortable(std::chrono::seconds(60), abort_src);
|
||||
}
|
||||
}
|
||||
|
||||
// Contains a CDC log table's creation time (extracted from its schema's id)
|
||||
// and its CDC TTL setting.
|
||||
struct time_and_ttl {
|
||||
db_clock::time_point creation_time;
|
||||
int ttl;
|
||||
};
|
||||
|
||||
/*
|
||||
* See `maybe_rewrite_streams_descriptions`.
|
||||
* This is the long-running-in-the-background part of that function.
|
||||
* It returns the timestamp of the last rewritten generation (if any).
|
||||
*/
|
||||
static future<std::optional<db_clock::time_point>> rewrite_streams_descriptions(
|
||||
std::vector<time_and_ttl> times_and_ttls,
|
||||
shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source& abort_src) {
|
||||
cdc_log.info("Retrieving generation timestamps for rewriting...");
|
||||
auto tss = co_await get_cdc_desc_v1_timestamps(*sys_dist_ks, abort_src, get_num_token_owners);
|
||||
cdc_log.info("Generation timestamps retrieved.");
|
||||
|
||||
// Find first generation timestamp such that some CDC log table may contain data before this timestamp.
|
||||
// This predicate is monotonic w.r.t the timestamps.
|
||||
auto now = db_clock::now();
|
||||
std::sort(tss.begin(), tss.end());
|
||||
auto first = std::partition_point(tss.begin(), tss.end(), [&] (db_clock::time_point ts) {
|
||||
// partition_point finds first element that does *not* satisfy the predicate.
|
||||
return std::none_of(times_and_ttls.begin(), times_and_ttls.end(),
|
||||
[&] (const time_and_ttl& tat) {
|
||||
// In this CDC log table there are no entries older than the table's creation time
|
||||
// or (now - the table's ttl). We subtract 10s to account for some possible clock drift.
|
||||
// If ttl is set to 0 then entries in this table never expire. In that case we look
|
||||
// only at the table's creation time.
|
||||
auto no_entries_older_than =
|
||||
(tat.ttl == 0 ? tat.creation_time : std::max(tat.creation_time, now - std::chrono::seconds(tat.ttl)))
|
||||
- std::chrono::seconds(10);
|
||||
return no_entries_older_than < ts;
|
||||
});
|
||||
});
|
||||
|
||||
// Find first generation timestamp such that some CDC log table may contain data in this generation.
|
||||
// This and all later generations need to be written to the new streams table.
|
||||
if (first != tss.begin()) {
|
||||
--first;
|
||||
}
|
||||
|
||||
if (first == tss.end()) {
|
||||
cdc_log.info("No generations to rewrite.");
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
cdc_log.info("First generation to rewrite: {}", *first);
|
||||
|
||||
bool each_success = true;
|
||||
co_await max_concurrent_for_each(first, tss.end(), 10, [&] (db_clock::time_point ts) -> future<> {
|
||||
while (true) {
|
||||
try {
|
||||
co_return co_await do_update_streams_description(ts, *sys_dist_ks, { get_num_token_owners() });
|
||||
} catch (const no_generation_data_exception& e) {
|
||||
cdc_log.error("Failed to rewrite streams for generation {}: {}. Giving up.", ts, e);
|
||||
each_success = false;
|
||||
co_return;
|
||||
} catch (...) {
|
||||
cdc_log.warn("Failed to rewrite streams for generation {}: {}. Retrying in 60s.", ts, std::current_exception());
|
||||
}
|
||||
co_await sleep_abortable(std::chrono::seconds(60), abort_src);
|
||||
}
|
||||
});
|
||||
|
||||
if (each_success) {
|
||||
cdc_log.info("Rewriting stream tables finished successfully.");
|
||||
} else {
|
||||
cdc_log.info("Rewriting stream tables finished, but some generations could not be rewritten (check the logs).");
|
||||
}
|
||||
|
||||
if (first != tss.end()) {
|
||||
co_return *std::prev(tss.end());
|
||||
}
|
||||
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
future<> maybe_rewrite_streams_descriptions(
|
||||
const database& db,
|
||||
shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source& abort_src) {
|
||||
if (!db.has_schema(sys_dist_ks->NAME, sys_dist_ks->CDC_DESC_V1)) {
|
||||
// This cluster never went through a Scylla version which used this table
|
||||
// or the user deleted the table. Nothing to do.
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (co_await db::system_keyspace::cdc_is_rewritten()) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (db.get_config().cdc_dont_rewrite_streams()) {
|
||||
cdc_log.warn("Stream rewriting disabled. Manual administrator intervention may be required...");
|
||||
co_return;
|
||||
}
|
||||
|
||||
// For each CDC log table get the TTL setting (from CDC options) and the table's creation time
|
||||
std::vector<time_and_ttl> times_and_ttls;
|
||||
for (auto& [_, cf] : db.get_column_families()) {
|
||||
auto& s = *cf->schema();
|
||||
auto base = cdc::get_base_table(db, s.ks_name(), s.cf_name());
|
||||
if (!base) {
|
||||
// Not a CDC log table.
|
||||
continue;
|
||||
}
|
||||
auto& cdc_opts = base->cdc_options();
|
||||
if (!cdc_opts.enabled()) {
|
||||
// This table is named like a CDC log table but it's not one.
|
||||
continue;
|
||||
}
|
||||
|
||||
times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id()), cdc_opts.ttl()});
|
||||
}
|
||||
|
||||
if (times_and_ttls.empty()) {
|
||||
// There's no point in rewriting old generations' streams (they don't contain any data).
|
||||
cdc_log.info("No CDC log tables present, not rewriting stream tables.");
|
||||
co_return co_await db::system_keyspace::cdc_set_rewritten(std::nullopt);
|
||||
}
|
||||
|
||||
// It's safe to discard this future: the coroutine keeps system_distributed_keyspace alive
|
||||
// and the abort source's lifetime extends the lifetime of any other service.
|
||||
(void)(([_times_and_ttls = std::move(times_and_ttls), _sys_dist_ks = std::move(sys_dist_ks),
|
||||
_get_num_token_owners = std::move(get_num_token_owners), &_abort_src = abort_src] () mutable -> future<> {
|
||||
auto times_and_ttls = std::move(_times_and_ttls);
|
||||
auto sys_dist_ks = std::move(_sys_dist_ks);
|
||||
auto get_num_token_owners = std::move(_get_num_token_owners);
|
||||
auto& abort_src = _abort_src;
|
||||
|
||||
// This code is racing with node startup. At this point, we're most likely still waiting for gossip to settle
|
||||
// and some nodes that are UP may still be marked as DOWN by us.
|
||||
// Let's sleep a bit to increase the chance that the first attempt at rewriting succeeds (it's still ok if
|
||||
// it doesn't - we'll retry - but it's nice if we succeed without any warnings).
|
||||
co_await sleep_abortable(std::chrono::seconds(10), abort_src);
|
||||
|
||||
cdc_log.info("Rewriting stream tables in the background...");
|
||||
auto last_rewritten = co_await rewrite_streams_descriptions(
|
||||
std::move(times_and_ttls),
|
||||
std::move(sys_dist_ks),
|
||||
std::move(get_num_token_owners),
|
||||
abort_src);
|
||||
|
||||
co_await db::system_keyspace::cdc_set_rewritten(last_rewritten);
|
||||
})());
|
||||
}
|
||||
|
||||
static void assert_shard_zero(const sstring& where) {
|
||||
if (this_shard_id() != 0) {
|
||||
on_internal_error(cdc_log, format("`{}`: must be run on shard 0", where));
|
||||
}
|
||||
}
|
||||
|
||||
class and_reducer {
|
||||
private:
|
||||
bool _result = true;
|
||||
public:
|
||||
future<> operator()(bool value) {
|
||||
_result = value && _result;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
bool get() {
|
||||
return _result;
|
||||
}
|
||||
};
|
||||
|
||||
class or_reducer {
|
||||
private:
|
||||
bool _result = false;
|
||||
public:
|
||||
future<> operator()(bool value) {
|
||||
_result = value || _result;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
bool get() {
|
||||
return _result;
|
||||
}
|
||||
};
|
||||
|
||||
class generation_handling_nonfatal_exception : public std::runtime_error {
|
||||
using std::runtime_error::runtime_error;
|
||||
};
|
||||
|
||||
constexpr char could_not_retrieve_msg_template[]
|
||||
= "Could not retrieve CDC streams with timestamp {} upon gossip event. Reason: \"{}\". Action: {}.";
|
||||
|
||||
generation_service::generation_service(
|
||||
const db::config& cfg, gms::gossiper& g, sharded<db::system_distributed_keyspace>& sys_dist_ks,
|
||||
abort_source& abort_src, const locator::shared_token_metadata& stm)
|
||||
: _cfg(cfg), _gossiper(g), _sys_dist_ks(sys_dist_ks), _abort_src(abort_src), _token_metadata(stm) {
|
||||
}
|
||||
|
||||
future<> generation_service::stop() {
|
||||
if (this_shard_id() == 0) {
|
||||
co_await _gossiper.unregister_(shared_from_this());
|
||||
}
|
||||
|
||||
_stopped = true;
|
||||
}
|
||||
|
||||
generation_service::~generation_service() {
|
||||
assert(_stopped);
|
||||
}
|
||||
|
||||
future<> generation_service::after_join(std::optional<db_clock::time_point>&& startup_gen_ts) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
assert(db::system_keyspace::bootstrap_complete());
|
||||
|
||||
_gen_ts = std::move(startup_gen_ts);
|
||||
_gossiper.register_(shared_from_this());
|
||||
|
||||
_joined = true;
|
||||
|
||||
// Retrieve the latest CDC generation seen in gossip (if any).
|
||||
co_await scan_cdc_generations();
|
||||
}
|
||||
|
||||
void generation_service::on_join(gms::inet_address ep, gms::endpoint_state ep_state) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
auto val = ep_state.get_application_state_ptr(gms::application_state::CDC_STREAMS_TIMESTAMP);
|
||||
if (!val) {
|
||||
return;
|
||||
}
|
||||
|
||||
on_change(ep, gms::application_state::CDC_STREAMS_TIMESTAMP, *val);
|
||||
}
|
||||
|
||||
void generation_service::on_change(gms::inet_address ep, gms::application_state app_state, const gms::versioned_value& v) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
if (app_state != gms::application_state::CDC_STREAMS_TIMESTAMP) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto ts = gms::versioned_value::cdc_streams_timestamp_from_string(v.value);
|
||||
cdc_log.debug("Endpoint: {}, CDC generation timestamp change: {}", ep, ts);
|
||||
|
||||
handle_cdc_generation(ts).get();
|
||||
}
|
||||
|
||||
future<> generation_service::check_and_repair_cdc_streams() {
|
||||
if (!_joined) {
|
||||
throw std::runtime_error("check_and_repair_cdc_streams: node not initialized yet");
|
||||
}
|
||||
|
||||
auto latest = _gen_ts;
|
||||
const auto& endpoint_states = _gossiper.get_endpoint_states();
|
||||
for (const auto& [addr, state] : endpoint_states) {
|
||||
if (!_gossiper.is_normal(addr)) {
|
||||
throw std::runtime_error(format("All nodes must be in NORMAL state while performing check_and_repair_cdc_streams"
|
||||
" ({} is in state {})", addr, _gossiper.get_gossip_status(state)));
|
||||
}
|
||||
|
||||
const auto ts = get_streams_timestamp_for(addr, _gossiper);
|
||||
if (!latest || (ts && *ts > *latest)) {
|
||||
latest = ts;
|
||||
}
|
||||
}
|
||||
|
||||
bool should_regenerate = false;
|
||||
std::optional<topology_description> gen;
|
||||
|
||||
static const auto timeout_msg = "Timeout while fetching CDC topology description";
|
||||
static const auto topology_read_error_note = "Note: this is likely caused by"
|
||||
" node(s) being down or unreachable. It is recommended to check the network and"
|
||||
" restart/remove the failed node(s), then retry checkAndRepairCdcStreams command";
|
||||
static const auto exception_translating_msg = "Translating the exception to `request_execution_exception`";
|
||||
const auto tmptr = _token_metadata.get();
|
||||
auto sys_dist_ks = get_sys_dist_ks();
|
||||
try {
|
||||
gen = co_await sys_dist_ks->read_cdc_topology_description(
|
||||
*latest, { tmptr->count_normal_token_owners() });
|
||||
} catch (exceptions::request_timeout_exception& e) {
|
||||
cdc_log.error("{}: \"{}\". {}.", timeout_msg, e.what(), exception_translating_msg);
|
||||
throw exceptions::request_execution_exception(exceptions::exception_code::READ_TIMEOUT,
|
||||
format("{}. {}.", timeout_msg, topology_read_error_note));
|
||||
} catch (exceptions::unavailable_exception& e) {
|
||||
static const auto unavailable_msg = "Node(s) unavailable while fetching CDC topology description";
|
||||
cdc_log.error("{}: \"{}\". {}.", unavailable_msg, e.what(), exception_translating_msg);
|
||||
throw exceptions::request_execution_exception(exceptions::exception_code::UNAVAILABLE,
|
||||
format("{}. {}.", unavailable_msg, topology_read_error_note));
|
||||
} catch (...) {
|
||||
const auto ep = std::current_exception();
|
||||
if (is_timeout_exception(ep)) {
|
||||
cdc_log.error("{}: \"{}\". {}.", timeout_msg, ep, exception_translating_msg);
|
||||
throw exceptions::request_execution_exception(exceptions::exception_code::READ_TIMEOUT,
|
||||
format("{}. {}.", timeout_msg, topology_read_error_note));
|
||||
}
|
||||
// On exotic errors proceed with regeneration
|
||||
cdc_log.error("Exception while reading CDC topology description: \"{}\". Regenerating streams anyway.", ep);
|
||||
should_regenerate = true;
|
||||
}
|
||||
|
||||
if (!gen) {
|
||||
cdc_log.error(
|
||||
"Could not find CDC generation with timestamp {} in distributed system tables (current time: {}),"
|
||||
" even though some node gossiped about it.",
|
||||
latest, db_clock::now());
|
||||
should_regenerate = true;
|
||||
} else {
|
||||
std::unordered_set<dht::token> gen_ends;
|
||||
for (const auto& entry : gen->entries()) {
|
||||
gen_ends.insert(entry.token_range_end);
|
||||
}
|
||||
for (const auto& metadata_token : tmptr->sorted_tokens()) {
|
||||
if (!gen_ends.contains(metadata_token)) {
|
||||
cdc_log.warn("CDC generation {} missing token {}. Regenerating.", latest, metadata_token);
|
||||
should_regenerate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!should_regenerate) {
|
||||
if (latest != _gen_ts) {
|
||||
co_await do_handle_cdc_generation(*latest);
|
||||
}
|
||||
cdc_log.info("CDC generation {} does not need repair", latest);
|
||||
co_return;
|
||||
}
|
||||
const auto new_gen_ts = co_await make_new_cdc_generation(_cfg,
|
||||
{}, std::move(tmptr), _gossiper, *sys_dist_ks,
|
||||
std::chrono::milliseconds(_cfg.ring_delay_ms()), true /* add delay */);
|
||||
// Need to artificially update our STATUS so other nodes handle the timestamp change
|
||||
auto status = _gossiper.get_application_state_ptr(
|
||||
utils::fb_utilities::get_broadcast_address(), gms::application_state::STATUS);
|
||||
if (!status) {
|
||||
cdc_log.error("Our STATUS is missing");
|
||||
cdc_log.error("Aborting CDC generation repair due to missing STATUS");
|
||||
co_return;
|
||||
}
|
||||
// Update _gen_ts first, so that do_handle_cdc_generation (which will get called due to the status update)
|
||||
// won't try to update the gossiper, which would result in a deadlock inside add_local_application_state
|
||||
_gen_ts = new_gen_ts;
|
||||
co_await _gossiper.add_local_application_state({
|
||||
{ gms::application_state::CDC_STREAMS_TIMESTAMP, gms::versioned_value::cdc_streams_timestamp(new_gen_ts) },
|
||||
{ gms::application_state::STATUS, *status }
|
||||
});
|
||||
co_await db::system_keyspace::update_cdc_streams_timestamp(new_gen_ts);
|
||||
}
|
||||
|
||||
future<> generation_service::handle_cdc_generation(std::optional<db_clock::time_point> ts) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
if (!ts) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (!db::system_keyspace::bootstrap_complete() || !_sys_dist_ks.local_is_initialized()
|
||||
|| !_sys_dist_ks.local().started()) {
|
||||
// The service should not be listening for generation changes until after the node
|
||||
// is bootstrapped. Therefore we would previously assume that this condition
|
||||
// can never become true and call on_internal_error here, but it turns out that
|
||||
// it may become true on decommission: the node enters NEEDS_BOOTSTRAP
|
||||
// state before leaving the token ring, so bootstrap_complete() becomes false.
|
||||
// In that case we can simply return.
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (co_await container().map_reduce(and_reducer(), [ts = *ts] (generation_service& svc) {
|
||||
return !svc._cdc_metadata.prepare(ts);
|
||||
})) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
bool using_this_gen = false;
|
||||
try {
|
||||
using_this_gen = co_await do_handle_cdc_generation_intercept_nonfatal_errors(*ts);
|
||||
} catch (generation_handling_nonfatal_exception& e) {
|
||||
cdc_log.warn(could_not_retrieve_msg_template, ts, e.what(), "retrying in the background");
|
||||
async_handle_cdc_generation(*ts);
|
||||
co_return;
|
||||
} catch (...) {
|
||||
cdc_log.error(could_not_retrieve_msg_template, ts, std::current_exception(), "not retrying");
|
||||
co_return; // Exotic ("fatal") exception => do not retry
|
||||
}
|
||||
|
||||
if (using_this_gen) {
|
||||
cdc_log.info("Starting to use generation {}", *ts);
|
||||
co_await update_streams_description(*ts, get_sys_dist_ks(),
|
||||
[tmptr = _token_metadata.get()] { return tmptr->count_normal_token_owners(); },
|
||||
_abort_src);
|
||||
}
|
||||
}
|
||||
|
||||
void generation_service::async_handle_cdc_generation(db_clock::time_point ts) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
(void)(([] (db_clock::time_point ts, shared_ptr<generation_service> svc) -> future<> {
|
||||
while (true) {
|
||||
co_await sleep_abortable(std::chrono::seconds(5), svc->_abort_src);
|
||||
|
||||
try {
|
||||
bool using_this_gen = co_await svc->do_handle_cdc_generation_intercept_nonfatal_errors(ts);
|
||||
if (using_this_gen) {
|
||||
cdc_log.info("Starting to use generation {}", ts);
|
||||
co_await update_streams_description(ts, svc->get_sys_dist_ks(),
|
||||
[tmptr = svc->_token_metadata.get()] { return tmptr->count_normal_token_owners(); },
|
||||
svc->_abort_src);
|
||||
}
|
||||
co_return;
|
||||
} catch (generation_handling_nonfatal_exception& e) {
|
||||
cdc_log.warn(could_not_retrieve_msg_template, ts, e.what(), "continuing to retry in the background");
|
||||
} catch (...) {
|
||||
cdc_log.error(could_not_retrieve_msg_template, ts, std::current_exception(), "not retrying anymore");
|
||||
co_return; // Exotic ("fatal") exception => do not retry
|
||||
}
|
||||
|
||||
if (co_await svc->container().map_reduce(and_reducer(), [ts] (generation_service& svc) {
|
||||
return svc._cdc_metadata.known_or_obsolete(ts);
|
||||
})) {
|
||||
co_return;
|
||||
}
|
||||
}
|
||||
})(ts, shared_from_this()));
|
||||
}
|
||||
|
||||
future<> generation_service::scan_cdc_generations() {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
std::optional<db_clock::time_point> latest;
|
||||
for (const auto& ep: _gossiper.get_endpoint_states()) {
|
||||
auto ts = get_streams_timestamp_for(ep.first, _gossiper);
|
||||
if (!latest || (ts && *ts > *latest)) {
|
||||
latest = ts;
|
||||
}
|
||||
}
|
||||
|
||||
if (latest) {
|
||||
cdc_log.info("Latest generation seen during startup: {}", *latest);
|
||||
co_await handle_cdc_generation(latest);
|
||||
} else {
|
||||
cdc_log.info("No generation seen during startup.");
|
||||
}
|
||||
}
|
||||
|
||||
future<bool> generation_service::do_handle_cdc_generation_intercept_nonfatal_errors(db_clock::time_point ts) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
try {
|
||||
co_return co_await do_handle_cdc_generation(ts);
|
||||
} catch (exceptions::request_timeout_exception& e) {
|
||||
throw generation_handling_nonfatal_exception(e.what());
|
||||
} catch (exceptions::unavailable_exception& e) {
|
||||
throw generation_handling_nonfatal_exception(e.what());
|
||||
} catch (exceptions::read_failure_exception& e) {
|
||||
throw generation_handling_nonfatal_exception(e.what());
|
||||
} catch (...) {
|
||||
const auto ep = std::current_exception();
|
||||
if (is_timeout_exception(ep)) {
|
||||
throw generation_handling_nonfatal_exception(format("{}", ep));
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
future<bool> generation_service::do_handle_cdc_generation(db_clock::time_point ts) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
auto sys_dist_ks = get_sys_dist_ks();
|
||||
auto gen = co_await sys_dist_ks->read_cdc_topology_description(
|
||||
ts, { _token_metadata.get()->count_normal_token_owners() });
|
||||
if (!gen) {
|
||||
throw std::runtime_error(format(
|
||||
"Could not find CDC generation with timestamp {} in distributed system tables (current time: {}),"
|
||||
" even though some node gossiped about it.",
|
||||
ts, db_clock::now()));
|
||||
}
|
||||
|
||||
// If we're not gossiping our own generation timestamp (because we've upgraded from a non-CDC/old version,
|
||||
// or we somehow lost it due to a byzantine failure), start gossiping someone else's timestamp.
|
||||
// This is to avoid the upgrade check on every restart (see `should_propose_first_cdc_generation`).
|
||||
// And if we notice that `ts` is higher than our timestamp, we will start gossiping it instead,
|
||||
// so if the node that initially gossiped `ts` leaves the cluster while `ts` is still the latest generation,
|
||||
// the cluster will remember.
|
||||
if (!_gen_ts || *_gen_ts < ts) {
|
||||
_gen_ts = ts;
|
||||
co_await db::system_keyspace::update_cdc_streams_timestamp(ts);
|
||||
co_await _gossiper.add_local_application_state(
|
||||
gms::application_state::CDC_STREAMS_TIMESTAMP, gms::versioned_value::cdc_streams_timestamp(ts));
|
||||
}
|
||||
|
||||
// Return `true` iff the generation was inserted on any of our shards.
|
||||
co_return co_await container().map_reduce(or_reducer(), [ts, &gen] (generation_service& svc) {
|
||||
auto gen_ = *gen;
|
||||
return svc._cdc_metadata.insert(ts, std::move(gen_));
|
||||
});
|
||||
}
|
||||
|
||||
shared_ptr<db::system_distributed_keyspace> generation_service::get_sys_dist_ks() {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
if (!_sys_dist_ks.local_is_initialized()) {
|
||||
throw std::runtime_error("system distributed keyspace not initialized");
|
||||
}
|
||||
|
||||
return _sys_dist_ks.local_shared();
|
||||
}
|
||||
|
||||
} // namespace cdc
|
||||
|
||||
@@ -40,8 +40,6 @@
|
||||
#include "database_fwd.hh"
|
||||
#include "db_clock.hh"
|
||||
#include "dht/token.hh"
|
||||
#include "locator/token_metadata.hh"
|
||||
#include "utils/chunked_vector.hh"
|
||||
|
||||
namespace seastar {
|
||||
class abort_source;
|
||||
@@ -57,6 +55,10 @@ namespace gms {
|
||||
class gossiper;
|
||||
} // namespace gms
|
||||
|
||||
namespace locator {
|
||||
class token_metadata;
|
||||
} // namespace locator
|
||||
|
||||
namespace cdc {
|
||||
|
||||
class stream_id final {
|
||||
@@ -122,19 +124,14 @@ public:
|
||||
*/
|
||||
class streams_version {
|
||||
public:
|
||||
utils::chunked_vector<stream_id> streams;
|
||||
std::vector<stream_id> streams;
|
||||
db_clock::time_point timestamp;
|
||||
std::optional<db_clock::time_point> expired;
|
||||
|
||||
streams_version(utils::chunked_vector<stream_id> s, db_clock::time_point ts)
|
||||
streams_version(std::vector<stream_id> s, db_clock::time_point ts, std::optional<db_clock::time_point> exp)
|
||||
: streams(std::move(s))
|
||||
, timestamp(ts)
|
||||
{}
|
||||
};
|
||||
|
||||
class no_generation_data_exception : public std::runtime_error {
|
||||
public:
|
||||
no_generation_data_exception(db_clock::time_point generation_ts)
|
||||
: std::runtime_error(format("could not find generation data for timestamp {}", generation_ts))
|
||||
, expired(std::move(exp))
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -167,14 +164,14 @@ future<db_clock::time_point> get_local_streams_timestamp();
|
||||
* (not guaranteed in the current implementation, but expected to be the common case;
|
||||
* we assume that `ring_delay` is enough for other nodes to learn about the new generation).
|
||||
*/
|
||||
future<db_clock::time_point> make_new_cdc_generation(
|
||||
db_clock::time_point make_new_cdc_generation(
|
||||
const db::config& cfg,
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const locator::token_metadata_ptr tmptr,
|
||||
const locator::token_metadata& tm,
|
||||
const gms::gossiper& g,
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
std::chrono::milliseconds ring_delay,
|
||||
bool add_delay);
|
||||
bool for_testing);
|
||||
|
||||
/* Retrieves CDC streams generation timestamp from the given endpoint's application state (broadcasted through gossip).
|
||||
* We might be during a rolling upgrade, so the timestamp might not be there (if the other node didn't upgrade yet),
|
||||
@@ -190,22 +187,13 @@ std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_ad
|
||||
*
|
||||
* Returning from this function does not mean that the table update was successful: the function
|
||||
* might run an asynchronous task in the background.
|
||||
*
|
||||
* Run inside seastar::async context.
|
||||
*/
|
||||
future<> update_streams_description(
|
||||
void update_streams_description(
|
||||
db_clock::time_point,
|
||||
shared_ptr<db::system_distributed_keyspace>,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source&);
|
||||
|
||||
/* Part of the upgrade procedure. Useful in case where the version of Scylla that we're upgrading from
|
||||
* used the "cdc_streams_descriptions" table. This procedure ensures that the new "cdc_streams_descriptions_v2"
|
||||
* table contains streams of all generations that were present in the old table and may still contain data
|
||||
* (i.e. there exist CDC log tables that may contain rows with partition keys being the stream IDs from
|
||||
* these generations). */
|
||||
future<> maybe_rewrite_streams_descriptions(
|
||||
const database&,
|
||||
shared_ptr<db::system_distributed_keyspace>,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source&);
|
||||
|
||||
} // namespace cdc
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
* Copyright (C) 2021 ScyllaDB
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cdc/metadata.hh"
|
||||
#include "gms/i_endpoint_state_change_subscriber.hh"
|
||||
|
||||
namespace db {
|
||||
class system_distributed_keyspace;
|
||||
}
|
||||
|
||||
namespace gms {
|
||||
class gossiper;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
|
||||
class generation_service : public peering_sharded_service<generation_service>
|
||||
, public async_sharded_service<generation_service>
|
||||
, public gms::i_endpoint_state_change_subscriber {
|
||||
|
||||
bool _stopped = false;
|
||||
|
||||
// The node has joined the token ring. Set to `true` on `after_join` call.
|
||||
bool _joined = false;
|
||||
|
||||
const db::config& _cfg;
|
||||
gms::gossiper& _gossiper;
|
||||
sharded<db::system_distributed_keyspace>& _sys_dist_ks;
|
||||
abort_source& _abort_src;
|
||||
const locator::shared_token_metadata& _token_metadata;
|
||||
|
||||
/* Maintains the set of known CDC generations used to pick streams for log writes (i.e., the partition keys of these log writes).
|
||||
* Updated in response to certain gossip events (see the handle_cdc_generation function).
|
||||
*/
|
||||
cdc::metadata _cdc_metadata;
|
||||
|
||||
/* The latest known generation timestamp and the timestamp that we're currently gossiping
|
||||
* (as CDC_STREAMS_TIMESTAMP application state).
|
||||
*
|
||||
* Only shard 0 manages this, hence it will be std::nullopt on all shards other than 0.
|
||||
* This timestamp is also persisted in the system.cdc_local table.
|
||||
*
|
||||
* On shard 0 this may be nullopt only in one special case: rolling upgrade, when we upgrade
|
||||
* from an old version of Scylla that didn't support CDC. In that case one node in the cluster
|
||||
* will create the first generation and start gossiping it; it may be us, or it may be some
|
||||
* different node. In any case, eventually - after one of the nodes gossips the first timestamp
|
||||
* - we'll catch on and this variable will be updated with that generation.
|
||||
*/
|
||||
std::optional<db_clock::time_point> _gen_ts;
|
||||
public:
|
||||
generation_service(const db::config&, gms::gossiper&,
|
||||
sharded<db::system_distributed_keyspace>&, abort_source&, const locator::shared_token_metadata&);
|
||||
|
||||
future<> stop();
|
||||
~generation_service();
|
||||
|
||||
/* After the node bootstraps and creates a new CDC generation, or restarts and loads the last
|
||||
* known generation timestamp from persistent storage, this function should be called with
|
||||
* that generation timestamp moved in as the `startup_gen_ts` parameter.
|
||||
* This passes the responsibility of managing generations from the node startup code to this service;
|
||||
* until then, the service remains dormant.
|
||||
* At the time of writing this comment, the startup code is in `storage_service::join_token_ring`, hence
|
||||
* `after_join` should be called at the end of that function.
|
||||
* Precondition: the node has completed bootstrapping and system_distributed_keyspace is initialized.
|
||||
* Must be called on shard 0 - that's where the generation management happens.
|
||||
*/
|
||||
future<> after_join(std::optional<db_clock::time_point>&& startup_gen_ts);
|
||||
|
||||
cdc::metadata& get_cdc_metadata() {
|
||||
return _cdc_metadata;
|
||||
}
|
||||
|
||||
virtual void before_change(gms::inet_address, gms::endpoint_state, gms::application_state, const gms::versioned_value&) override {}
|
||||
virtual void on_alive(gms::inet_address, gms::endpoint_state) override {}
|
||||
virtual void on_dead(gms::inet_address, gms::endpoint_state) override {}
|
||||
virtual void on_remove(gms::inet_address) override {}
|
||||
virtual void on_restart(gms::inet_address, gms::endpoint_state) override {}
|
||||
|
||||
virtual void on_join(gms::inet_address, gms::endpoint_state) override;
|
||||
virtual void on_change(gms::inet_address, gms::application_state, const gms::versioned_value&) override;
|
||||
|
||||
future<> check_and_repair_cdc_streams();
|
||||
|
||||
private:
|
||||
/* Retrieve the CDC generation which starts at the given timestamp (from a distributed table created for this purpose)
|
||||
* and start using it for CDC log writes if it's not obsolete.
|
||||
*/
|
||||
future<> handle_cdc_generation(std::optional<db_clock::time_point>);
|
||||
|
||||
/* If `handle_cdc_generation` fails, it schedules an asynchronous retry in the background
|
||||
* using `async_handle_cdc_generation`.
|
||||
*/
|
||||
void async_handle_cdc_generation(db_clock::time_point);
|
||||
|
||||
/* Wrapper around `do_handle_cdc_generation` which intercepts timeout/unavailability exceptions.
|
||||
* Returns: do_handle_cdc_generation(ts). */
|
||||
future<bool> do_handle_cdc_generation_intercept_nonfatal_errors(db_clock::time_point);
|
||||
|
||||
/* Returns `true` iff we started using the generation (it was not obsolete or already known),
|
||||
* which means that this node might write some CDC log entries using streams from this generation. */
|
||||
future<bool> do_handle_cdc_generation(db_clock::time_point);
|
||||
|
||||
/* Scan CDC generation timestamps gossiped by other nodes and retrieve the latest one.
|
||||
* This function should be called once at the end of the node startup procedure
|
||||
* (after the node is started and running normally, it will retrieve generations on gossip events instead).
|
||||
*/
|
||||
future<> scan_cdc_generations();
|
||||
|
||||
/* generation_service code might be racing with system_distributed_keyspace deinitialization
|
||||
* (the deinitialization order is broken).
|
||||
* Therefore, whenever we want to access sys_dist_ks in a background task,
|
||||
* we need to check if the instance is still there. Storing the shared pointer will keep it alive.
|
||||
*/
|
||||
shared_ptr<db::system_distributed_keyspace> get_sys_dist_ks();
|
||||
};
|
||||
|
||||
} // namespace cdc
|
||||
104
cdc/log.cc
104
cdc/log.cc
@@ -32,7 +32,6 @@
|
||||
#include "cdc/split.hh"
|
||||
#include "cdc/cdc_options.hh"
|
||||
#include "cdc/change_visitor.hh"
|
||||
#include "cdc/metadata.hh"
|
||||
#include "bytes.hh"
|
||||
#include "database.hh"
|
||||
#include "db/config.hh"
|
||||
@@ -49,9 +48,6 @@
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "log.hh"
|
||||
#include "utils/rjson.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
#include "utils/managed_bytes.hh"
|
||||
#include "utils/fragment_range.hh"
|
||||
#include "types.hh"
|
||||
#include "concrete_types.hh"
|
||||
#include "types/listlike_partial_deserializing_iterator.hh"
|
||||
@@ -74,7 +70,7 @@ using namespace std::chrono_literals;
|
||||
logging::logger cdc_log("cdc");
|
||||
|
||||
namespace cdc {
|
||||
static schema_ptr create_log_schema(const schema&, std::optional<utils::UUID> = {}, schema_ptr = nullptr);
|
||||
static schema_ptr create_log_schema(const schema&, std::optional<utils::UUID> = {});
|
||||
}
|
||||
|
||||
static constexpr auto cdc_group_name = "cdc";
|
||||
@@ -221,10 +217,10 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt, log_schema);
|
||||
auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt);
|
||||
|
||||
auto log_mut = log_schema
|
||||
? db::schema_tables::make_update_table_mutations(db, keyspace.metadata(), log_schema, new_log_schema, timestamp, false)
|
||||
? db::schema_tables::make_update_table_mutations(keyspace.metadata(), log_schema, new_log_schema, timestamp, false)
|
||||
: db::schema_tables::make_create_table_mutations(keyspace.metadata(), new_log_schema, timestamp)
|
||||
;
|
||||
|
||||
@@ -281,8 +277,8 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
cdc::cdc_service::cdc_service(service::storage_proxy& proxy, cdc::metadata& cdc_metadata)
|
||||
: cdc_service(db_context::builder(proxy, cdc_metadata).build())
|
||||
cdc::cdc_service::cdc_service(service::storage_proxy& proxy)
|
||||
: cdc_service(db_context::builder(proxy).build())
|
||||
{}
|
||||
|
||||
cdc::cdc_service::cdc_service(db_context ctxt)
|
||||
@@ -490,7 +486,7 @@ bytes log_data_column_deleted_elements_name_bytes(const bytes& column_name) {
|
||||
return to_bytes(cdc_deleted_elements_column_prefix) + column_name;
|
||||
}
|
||||
|
||||
static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid, schema_ptr old) {
|
||||
static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid) {
|
||||
schema_builder b(s.ks_name(), log_name(s.cf_name()));
|
||||
b.with_partitioner("com.scylladb.dht.CDCPartitioner");
|
||||
b.set_compaction_strategy(sstables::compaction_strategy_type::time_window);
|
||||
@@ -571,25 +567,11 @@ static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID>
|
||||
b.set_uuid(*uuid);
|
||||
}
|
||||
|
||||
/**
|
||||
* #10473 - if we are redefining the log table, we need to ensure any dropped
|
||||
* columns are registered in "dropped_columns" table, otherwise clients will not
|
||||
* be able to read data older than now.
|
||||
*/
|
||||
if (old) {
|
||||
// not super efficient, but we don't do this often.
|
||||
for (auto& col : old->all_columns()) {
|
||||
if (!b.has_column({col.name(), col.name_as_text() })) {
|
||||
b.without_column(col.name_as_text(), col.type, api::new_timestamp());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return b.build();
|
||||
}
|
||||
|
||||
db_context::builder::builder(service::storage_proxy& proxy, cdc::metadata& cdc_metadata)
|
||||
: _proxy(proxy), _cdc_metadata(cdc_metadata)
|
||||
db_context::builder::builder(service::storage_proxy& proxy)
|
||||
: _proxy(proxy)
|
||||
{}
|
||||
|
||||
db_context::builder& db_context::builder::with_migration_notifier(service::migration_notifier& migration_notifier) {
|
||||
@@ -597,24 +579,28 @@ db_context::builder& db_context::builder::with_migration_notifier(service::migra
|
||||
return *this;
|
||||
}
|
||||
|
||||
db_context::builder& db_context::builder::with_token_metadata(const locator::token_metadata& token_metadata) {
|
||||
_token_metadata = token_metadata;
|
||||
return *this;
|
||||
}
|
||||
|
||||
db_context::builder& db_context::builder::with_cdc_metadata(cdc::metadata& cdc_metadata) {
|
||||
_cdc_metadata = cdc_metadata;
|
||||
return *this;
|
||||
}
|
||||
|
||||
db_context db_context::builder::build() {
|
||||
return db_context{
|
||||
_proxy,
|
||||
_migration_notifier ? _migration_notifier->get() : service::get_local_storage_service().get_migration_notifier(),
|
||||
_cdc_metadata,
|
||||
_token_metadata ? _token_metadata->get() : service::get_local_storage_service().get_token_metadata(),
|
||||
_cdc_metadata ? _cdc_metadata->get() : service::get_local_storage_service().get_cdc_metadata(),
|
||||
};
|
||||
}
|
||||
|
||||
// iterators for collection merge
|
||||
template<typename T>
|
||||
class collection_iterator {
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = const T;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = const T*;
|
||||
using reference = const T&;
|
||||
private:
|
||||
class collection_iterator : public std::iterator<std::input_iterator_tag, const T> {
|
||||
bytes_view _v, _next;
|
||||
size_t _rem = 0;
|
||||
T _current;
|
||||
@@ -679,14 +665,6 @@ void collection_iterator<bytes_view>::parse() {
|
||||
_current = k;
|
||||
}
|
||||
|
||||
template<>
|
||||
void collection_iterator<managed_bytes_view>::parse() {
|
||||
assert(_rem > 0);
|
||||
_next = _v;
|
||||
auto k = read_collection_value(_next, cql_serialization_format::internal());
|
||||
_current = k;
|
||||
}
|
||||
|
||||
template<typename Container, typename T>
|
||||
class maybe_back_insert_iterator : public std::back_insert_iterator<Container> {
|
||||
const abstract_type& _type;
|
||||
@@ -730,16 +708,16 @@ private:
|
||||
}
|
||||
return false;
|
||||
}
|
||||
int32_t compare(const T&, const value_type& v);
|
||||
bool compare(const T&, const value_type& v);
|
||||
};
|
||||
|
||||
template<>
|
||||
int32_t maybe_back_insert_iterator<std::vector<std::pair<bytes_view, bytes_view>>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
|
||||
bool maybe_back_insert_iterator<std::vector<std::pair<bytes_view, bytes_view>>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
|
||||
return _type.compare(t, v.first);
|
||||
}
|
||||
|
||||
template<>
|
||||
int32_t maybe_back_insert_iterator<std::vector<bytes_view>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
|
||||
bool maybe_back_insert_iterator<std::vector<bytes_view>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
|
||||
return _type.compare(t, v);
|
||||
}
|
||||
|
||||
@@ -787,18 +765,18 @@ static bytes merge(const set_type_impl& ctype, const bytes_opt& prev, const byte
|
||||
return set_type_impl::serialize_partially_deserialized_form(res, cql_serialization_format::internal());
|
||||
}
|
||||
static bytes merge(const user_type_impl& type, const bytes_opt& prev, const bytes_opt& next, const bytes_opt& deleted) {
|
||||
std::vector<managed_bytes_view_opt> res(type.size());
|
||||
udt_for_each(prev, [&res, i = res.begin()](managed_bytes_view_opt k) mutable {
|
||||
std::vector<bytes_view_opt> res(type.size());
|
||||
udt_for_each(prev, [&res, i = res.begin()](bytes_view_opt k) mutable {
|
||||
*i++ = k;
|
||||
});
|
||||
udt_for_each(next, [&res, i = res.begin()](managed_bytes_view_opt k) mutable {
|
||||
udt_for_each(next, [&res, i = res.begin()](bytes_view_opt k) mutable {
|
||||
if (k) {
|
||||
*i = k;
|
||||
}
|
||||
++i;
|
||||
});
|
||||
collection_iterator<managed_bytes_view> e, d(deleted);
|
||||
std::for_each(d, e, [&res](managed_bytes_view k) {
|
||||
collection_iterator<bytes_view> e, d(deleted);
|
||||
std::for_each(d, e, [&res](bytes_view k) {
|
||||
auto index = deserialize_field_index(k);
|
||||
res[index] = std::nullopt;
|
||||
});
|
||||
@@ -837,13 +815,13 @@ static bytes_opt get_preimage_col_value(const column_definition& cdef, const cql
|
||||
auto v = pirow->get_view(cdef.name_as_text());
|
||||
auto f = cql_serialization_format::internal();
|
||||
auto n = read_collection_size(v, f);
|
||||
std::vector<bytes> tmp;
|
||||
std::vector<bytes_view> tmp;
|
||||
tmp.reserve(n);
|
||||
while (n--) {
|
||||
tmp.emplace_back(read_collection_value(v, f).linearize()); // key
|
||||
tmp.emplace_back(read_collection_value(v, f)); // key
|
||||
read_collection_value(v, f); // value. ignore.
|
||||
}
|
||||
return set_type_impl::serialize_partially_deserialized_form({tmp.begin(), tmp.end()}, f);
|
||||
return set_type_impl::serialize_partially_deserialized_form(tmp, f);
|
||||
},
|
||||
[&] (const abstract_type& o) -> bytes {
|
||||
return pirow->get_blob(cdef.name_as_text());
|
||||
@@ -999,13 +977,13 @@ private:
|
||||
};
|
||||
|
||||
static bytes get_bytes(const atomic_cell_view& acv) {
|
||||
return to_bytes(acv.value());
|
||||
return acv.value().linearize();
|
||||
}
|
||||
|
||||
static bytes_view get_bytes_view(const atomic_cell_view& acv, std::forward_list<bytes>& buf) {
|
||||
return acv.value().is_fragmented()
|
||||
? bytes_view{buf.emplace_front(to_bytes(acv.value()))}
|
||||
: acv.value().current_fragment();
|
||||
? bytes_view{buf.emplace_front(acv.value().linearize())}
|
||||
: acv.value().first_fragment();
|
||||
}
|
||||
|
||||
static ttl_opt get_ttl(const atomic_cell_view& acv) {
|
||||
@@ -1158,7 +1136,7 @@ struct process_row_visitor {
|
||||
_touched_parts.set<stats::part_type::UDT>();
|
||||
|
||||
struct udt_visitor : public collection_visitor {
|
||||
std::vector<bytes_view_opt> _added_cells;
|
||||
std::vector<bytes_opt> _added_cells;
|
||||
std::forward_list<bytes>& _buf;
|
||||
|
||||
udt_visitor(ttl_opt& ttl_column, size_t num_keys, std::forward_list<bytes>& buf)
|
||||
@@ -1670,7 +1648,13 @@ public:
|
||||
try {
|
||||
return _ctx._proxy.query(_schema, std::move(command), std::move(partition_ranges), select_cl, service::storage_proxy::coordinator_query_options(default_timeout(), empty_service_permit(), client_state)).then(
|
||||
[s = _schema, partition_slice = std::move(partition_slice), selection = std::move(selection)] (service::storage_proxy::coordinator_query_result qr) -> lw_shared_ptr<cql3::untyped_result_set> {
|
||||
return make_lw_shared<cql3::untyped_result_set>(*s, std::move(qr.query_result), *selection, partition_slice);
|
||||
cql3::selection::result_set_builder builder(*selection, gc_clock::now(), cql_serialization_format::latest());
|
||||
query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *s, *selection));
|
||||
auto result_set = builder.build();
|
||||
if (!result_set || result_set->empty()) {
|
||||
return {};
|
||||
}
|
||||
return make_lw_shared<cql3::untyped_result_set>(*result_set);
|
||||
});
|
||||
} catch (exceptions::unavailable_exception& e) {
|
||||
// `query` can throw `unavailable_exception`, which is seen by clients as ~ "NoHostAvailable".
|
||||
@@ -1714,7 +1698,7 @@ public:
|
||||
// as there will be no clustering row data to load into the state.
|
||||
return;
|
||||
}
|
||||
ck_parts.emplace_back(v->linearize());
|
||||
ck_parts.emplace_back(*v);
|
||||
}
|
||||
auto ck = clustering_key::from_exploded(std::move(ck_parts));
|
||||
|
||||
|
||||
10
cdc/log.hh
10
cdc/log.hh
@@ -80,7 +80,7 @@ class cdc_service final : public async_sharded_service<cdc::cdc_service> {
|
||||
std::unique_ptr<impl> _impl;
|
||||
public:
|
||||
future<> stop();
|
||||
cdc_service(service::storage_proxy&, cdc::metadata&);
|
||||
cdc_service(service::storage_proxy&);
|
||||
cdc_service(db_context);
|
||||
~cdc_service();
|
||||
|
||||
@@ -100,16 +100,20 @@ public:
|
||||
struct db_context final {
|
||||
service::storage_proxy& _proxy;
|
||||
service::migration_notifier& _migration_notifier;
|
||||
const locator::token_metadata& _token_metadata;
|
||||
cdc::metadata& _cdc_metadata;
|
||||
|
||||
class builder final {
|
||||
service::storage_proxy& _proxy;
|
||||
cdc::metadata& _cdc_metadata;
|
||||
std::optional<std::reference_wrapper<service::migration_notifier>> _migration_notifier;
|
||||
std::optional<std::reference_wrapper<const locator::token_metadata>> _token_metadata;
|
||||
std::optional<std::reference_wrapper<cdc::metadata>> _cdc_metadata;
|
||||
public:
|
||||
builder(service::storage_proxy& proxy, cdc::metadata&);
|
||||
builder(service::storage_proxy& proxy);
|
||||
|
||||
builder& with_migration_notifier(service::migration_notifier& migration_notifier);
|
||||
builder& with_token_metadata(const locator::token_metadata& token_metadata);
|
||||
builder& with_cdc_metadata(cdc::metadata&);
|
||||
|
||||
db_context build();
|
||||
};
|
||||
|
||||
@@ -31,7 +31,10 @@ class checked_file_impl : public file_impl {
|
||||
public:
|
||||
|
||||
checked_file_impl(const io_error_handler& error_handler, file f)
|
||||
: file_impl(*get_file_impl(f)), _error_handler(error_handler), _file(f) {
|
||||
: _error_handler(error_handler), _file(f) {
|
||||
_memory_dma_alignment = f.memory_dma_alignment();
|
||||
_disk_read_dma_alignment = f.disk_read_dma_alignment();
|
||||
_disk_write_dma_alignment = f.disk_write_dma_alignment();
|
||||
}
|
||||
|
||||
virtual future<size_t> write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) override {
|
||||
|
||||
@@ -67,8 +67,8 @@ public:
|
||||
int operator()(const clustering_key_prefix& p1, int32_t w1, const clustering_key_prefix& p2, int32_t w2) const {
|
||||
auto type = _s.get().clustering_key_prefix_type();
|
||||
auto res = prefix_equality_tri_compare(type->types().begin(),
|
||||
type->begin(p1.representation()), type->end(p1.representation()),
|
||||
type->begin(p2.representation()), type->end(p2.representation()),
|
||||
type->begin(p1), type->end(p1),
|
||||
type->begin(p2), type->end(p2),
|
||||
::tri_compare);
|
||||
if (res) {
|
||||
return res;
|
||||
|
||||
@@ -72,14 +72,7 @@ public:
|
||||
}
|
||||
return result;
|
||||
}
|
||||
class position_range_iterator {
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = const position_range;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = const position_range*;
|
||||
using reference = const position_range&;
|
||||
private:
|
||||
class position_range_iterator : public std::iterator<std::input_iterator_tag, const position_range> {
|
||||
set_type::iterator _i;
|
||||
public:
|
||||
position_range_iterator(set_type::iterator i) : _i(i) {}
|
||||
|
||||
@@ -65,11 +65,6 @@ private:
|
||||
_current_start = position_in_partition_view::for_range_start(_current_range.front());
|
||||
_current_end = position_in_partition_view::for_range_end(_current_range.front());
|
||||
}
|
||||
} else {
|
||||
// If the first range is contiguous with the static row, then advance _current_end as much as we can
|
||||
if (_current_range && !_current_range.front().start()) {
|
||||
_current_end = position_in_partition_view::for_range_end(_current_range.front());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "types/collection.hh"
|
||||
#include "types/user.hh"
|
||||
#include "concrete_types.hh"
|
||||
#include "atomic_cell_or_collection.hh"
|
||||
#include "mutation_partition.hh"
|
||||
#include "compaction_garbage_collector.hh"
|
||||
#include "combine.hh"
|
||||
@@ -29,28 +30,40 @@
|
||||
#include "collection_mutation.hh"
|
||||
|
||||
collection_mutation::collection_mutation(const abstract_type& type, collection_mutation_view v)
|
||||
: _data(v.data) {}
|
||||
: _data(imr_object_type::make(data::cell::make_collection(v.data), &type.imr_state().lsa_migrator())) {}
|
||||
|
||||
collection_mutation::collection_mutation(const abstract_type& type, managed_bytes data)
|
||||
: _data(std::move(data)) {}
|
||||
collection_mutation::collection_mutation(const abstract_type& type, const bytes_ostream& data)
|
||||
: _data(imr_object_type::make(data::cell::make_collection(fragment_range_view(data)), &type.imr_state().lsa_migrator())) {}
|
||||
|
||||
static collection_mutation_view get_collection_mutation_view(const uint8_t* ptr)
|
||||
{
|
||||
auto f = data::cell::structure::get_member<data::cell::tags::flags>(ptr);
|
||||
auto ti = data::type_info::make_collection();
|
||||
data::cell::context ctx(f, ti);
|
||||
auto view = data::cell::structure::get_member<data::cell::tags::cell>(ptr).as<data::cell::tags::collection>(ctx);
|
||||
auto dv = data::cell::variable_value::make_view(view, f.get<data::cell::tags::external_data>());
|
||||
return collection_mutation_view { dv };
|
||||
}
|
||||
|
||||
collection_mutation::operator collection_mutation_view() const
|
||||
{
|
||||
return collection_mutation_view{managed_bytes_view(_data)};
|
||||
return get_collection_mutation_view(_data.get());
|
||||
}
|
||||
|
||||
collection_mutation_view atomic_cell_or_collection::as_collection_mutation() const {
|
||||
return collection_mutation_view{managed_bytes_view(_data)};
|
||||
return get_collection_mutation_view(_data.get());
|
||||
}
|
||||
|
||||
bool collection_mutation_view::is_empty() const {
|
||||
auto in = collection_mutation_input_stream(fragment_range(data));
|
||||
auto in = collection_mutation_input_stream(data);
|
||||
auto has_tomb = in.read_trivial<bool>();
|
||||
return !has_tomb && in.read_trivial<uint32_t>() == 0;
|
||||
}
|
||||
|
||||
bool collection_mutation_view::is_any_live(const abstract_type& type, tombstone tomb, gc_clock::time_point now) const {
|
||||
auto in = collection_mutation_input_stream(fragment_range(data));
|
||||
template <typename F>
|
||||
requires std::is_invocable_r_v<const data::type_info&, F, collection_mutation_input_stream&>
|
||||
static bool is_any_live(const atomic_cell_value_view& data, tombstone tomb, gc_clock::time_point now, F&& read_cell_type_info) {
|
||||
auto in = collection_mutation_input_stream(data);
|
||||
auto has_tomb = in.read_trivial<bool>();
|
||||
if (has_tomb) {
|
||||
auto ts = in.read_trivial<api::timestamp_type>();
|
||||
@@ -60,10 +73,9 @@ bool collection_mutation_view::is_any_live(const abstract_type& type, tombstone
|
||||
|
||||
auto nr = in.read_trivial<uint32_t>();
|
||||
for (uint32_t i = 0; i != nr; ++i) {
|
||||
auto key_size = in.read_trivial<uint32_t>();
|
||||
in.skip(key_size);
|
||||
auto& type_info = read_cell_type_info(in);
|
||||
auto vsize = in.read_trivial<uint32_t>();
|
||||
auto value = atomic_cell_view::from_bytes(type, in.read(vsize));
|
||||
auto value = atomic_cell_view::from_bytes(type_info, in.read(vsize));
|
||||
if (value.is_live(tomb, now, false)) {
|
||||
return true;
|
||||
}
|
||||
@@ -72,8 +84,33 @@ bool collection_mutation_view::is_any_live(const abstract_type& type, tombstone
|
||||
return false;
|
||||
}
|
||||
|
||||
api::timestamp_type collection_mutation_view::last_update(const abstract_type& type) const {
|
||||
auto in = collection_mutation_input_stream(fragment_range(data));
|
||||
bool collection_mutation_view::is_any_live(const abstract_type& type, tombstone tomb, gc_clock::time_point now) const {
|
||||
return visit(type, make_visitor(
|
||||
[&] (const collection_type_impl& ctype) {
|
||||
auto& type_info = ctype.value_comparator()->imr_state().type_info();
|
||||
return ::is_any_live(data, tomb, now, [&type_info] (collection_mutation_input_stream& in) -> const data::type_info& {
|
||||
auto key_size = in.read_trivial<uint32_t>();
|
||||
in.skip(key_size);
|
||||
return type_info;
|
||||
});
|
||||
},
|
||||
[&] (const user_type_impl& utype) {
|
||||
return ::is_any_live(data, tomb, now, [&utype] (collection_mutation_input_stream& in) -> const data::type_info& {
|
||||
auto key_size = in.read_trivial<uint32_t>();
|
||||
auto key = in.read(key_size);
|
||||
return utype.type(deserialize_field_index(key))->imr_state().type_info();
|
||||
});
|
||||
},
|
||||
[&] (const abstract_type& o) -> bool {
|
||||
throw std::runtime_error(format("collection_mutation_view::is_any_live: unknown type {}", o.name()));
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
requires std::is_invocable_r_v<const data::type_info&, F, collection_mutation_input_stream&>
|
||||
static api::timestamp_type last_update(const atomic_cell_value_view& data, F&& read_cell_type_info) {
|
||||
auto in = collection_mutation_input_stream(data);
|
||||
api::timestamp_type max = api::missing_timestamp;
|
||||
auto has_tomb = in.read_trivial<bool>();
|
||||
if (has_tomb) {
|
||||
@@ -83,16 +120,39 @@ api::timestamp_type collection_mutation_view::last_update(const abstract_type& t
|
||||
|
||||
auto nr = in.read_trivial<uint32_t>();
|
||||
for (uint32_t i = 0; i != nr; ++i) {
|
||||
const auto key_size = in.read_trivial<uint32_t>();
|
||||
in.skip(key_size);
|
||||
auto& type_info = read_cell_type_info(in);
|
||||
auto vsize = in.read_trivial<uint32_t>();
|
||||
auto value = atomic_cell_view::from_bytes(type, in.read(vsize));
|
||||
auto value = atomic_cell_view::from_bytes(type_info, in.read(vsize));
|
||||
max = std::max(value.timestamp(), max);
|
||||
}
|
||||
|
||||
return max;
|
||||
}
|
||||
|
||||
|
||||
api::timestamp_type collection_mutation_view::last_update(const abstract_type& type) const {
|
||||
return visit(type, make_visitor(
|
||||
[&] (const collection_type_impl& ctype) {
|
||||
auto& type_info = ctype.value_comparator()->imr_state().type_info();
|
||||
return ::last_update(data, [&type_info] (collection_mutation_input_stream& in) -> const data::type_info& {
|
||||
auto key_size = in.read_trivial<uint32_t>();
|
||||
in.skip(key_size);
|
||||
return type_info;
|
||||
});
|
||||
},
|
||||
[&] (const user_type_impl& utype) {
|
||||
return ::last_update(data, [&utype] (collection_mutation_input_stream& in) -> const data::type_info& {
|
||||
auto key_size = in.read_trivial<uint32_t>();
|
||||
auto key = in.read(key_size);
|
||||
return utype.type(deserialize_field_index(key))->imr_state().type_info();
|
||||
});
|
||||
},
|
||||
[&] (const abstract_type& o) -> api::timestamp_type {
|
||||
throw std::runtime_error(format("collection_mutation_view::last_update: unknown type {}", o.name()));
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const collection_mutation_view::printer& cmvp) {
|
||||
fmt::print(os, "{{collection_mutation_view ");
|
||||
cmvp._cmv.with_deserialized(cmvp._type, [&os, &type = cmvp._type] (const collection_mutation_view_description& cmvd) {
|
||||
@@ -218,31 +278,28 @@ static collection_mutation serialize_collection_mutation(
|
||||
auto size = accumulate(cells, (size_t)4, element_size);
|
||||
size += 1;
|
||||
if (tomb) {
|
||||
size += sizeof(int64_t) + sizeof(int64_t);
|
||||
size += sizeof(tomb.timestamp) + sizeof(tomb.deletion_time);
|
||||
}
|
||||
managed_bytes ret(managed_bytes::initialized_later(), size);
|
||||
managed_bytes_mutable_view out(ret);
|
||||
write<uint8_t>(out, uint8_t(bool(tomb)));
|
||||
bytes_ostream ret;
|
||||
ret.reserve(size);
|
||||
auto out = ret.write_begin();
|
||||
*out++ = bool(tomb);
|
||||
if (tomb) {
|
||||
write<int64_t>(out, tomb.timestamp);
|
||||
write<int64_t>(out, tomb.deletion_time.time_since_epoch().count());
|
||||
write(out, tomb.timestamp);
|
||||
write(out, tomb.deletion_time.time_since_epoch().count());
|
||||
}
|
||||
auto writek = [&out] (bytes_view v) {
|
||||
write<int32_t>(out, v.size());
|
||||
write_fragmented(out, single_fragmented_view(v));
|
||||
};
|
||||
auto writev = [&out] (managed_bytes_view v) {
|
||||
write<int32_t>(out, v.size());
|
||||
write_fragmented(out, v);
|
||||
auto writeb = [&out] (bytes_view v) {
|
||||
serialize_int32(out, v.size());
|
||||
out = std::copy_n(v.begin(), v.size(), out);
|
||||
};
|
||||
// FIXME: overflow?
|
||||
write<int32_t>(out, boost::distance(cells));
|
||||
serialize_int32(out, boost::distance(cells));
|
||||
for (auto&& kv : cells) {
|
||||
auto&& k = kv.first;
|
||||
auto&& v = kv.second;
|
||||
writek(k);
|
||||
writeb(k);
|
||||
|
||||
writev(v.serialize());
|
||||
writeb(v.serialize());
|
||||
}
|
||||
return collection_mutation(type, ret);
|
||||
}
|
||||
@@ -391,12 +448,13 @@ deserialize_collection_mutation(const abstract_type& type, collection_mutation_i
|
||||
return visit(type, make_visitor(
|
||||
[&] (const collection_type_impl& ctype) {
|
||||
// value_comparator(), ugh
|
||||
return deserialize_collection_mutation(in, [&ctype] (collection_mutation_input_stream& in) {
|
||||
auto& type_info = ctype.value_comparator()->imr_state().type_info();
|
||||
return deserialize_collection_mutation(in, [&type_info] (collection_mutation_input_stream& in) {
|
||||
// FIXME: we could probably avoid the need for size
|
||||
auto ksize = in.read_trivial<uint32_t>();
|
||||
auto key = in.read(ksize);
|
||||
auto vsize = in.read_trivial<uint32_t>();
|
||||
auto value = atomic_cell_view::from_bytes(*ctype.value_comparator(), in.read(vsize));
|
||||
auto value = atomic_cell_view::from_bytes(type_info, in.read(vsize));
|
||||
return std::make_pair(key, value);
|
||||
});
|
||||
},
|
||||
@@ -406,7 +464,8 @@ deserialize_collection_mutation(const abstract_type& type, collection_mutation_i
|
||||
auto ksize = in.read_trivial<uint32_t>();
|
||||
auto key = in.read(ksize);
|
||||
auto vsize = in.read_trivial<uint32_t>();
|
||||
auto value = atomic_cell_view::from_bytes(*utype.type(deserialize_field_index(key)), in.read(vsize));
|
||||
auto value = atomic_cell_view::from_bytes(
|
||||
utype.type(deserialize_field_index(key))->imr_state().type_info(), in.read(vsize));
|
||||
return std::make_pair(key, value);
|
||||
});
|
||||
},
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include <iosfwd>
|
||||
|
||||
class abstract_type;
|
||||
class bytes_ostream;
|
||||
class compaction_garbage_collector;
|
||||
class row_tombstone;
|
||||
|
||||
@@ -69,7 +70,7 @@ struct collection_mutation_view_description {
|
||||
collection_mutation serialize(const abstract_type&) const;
|
||||
};
|
||||
|
||||
using collection_mutation_input_stream = utils::linearizing_input_stream<fragment_range<managed_bytes_view>, marshal_exception>;
|
||||
using collection_mutation_input_stream = utils::linearizing_input_stream<atomic_cell_value_view, marshal_exception>;
|
||||
|
||||
// Given a linearized collection_mutation_view, returns an auxiliary struct allowing the inspection of each cell.
|
||||
// The struct is an observer of the data given by the collection_mutation_view and is only valid while the
|
||||
@@ -79,7 +80,7 @@ collection_mutation_view_description deserialize_collection_mutation(const abstr
|
||||
|
||||
class collection_mutation_view {
|
||||
public:
|
||||
managed_bytes_view data;
|
||||
atomic_cell_value_view data;
|
||||
|
||||
// Is this a noop mutation?
|
||||
bool is_empty() const;
|
||||
@@ -96,7 +97,7 @@ public:
|
||||
// calls it on the corresponding description of `this`.
|
||||
template <typename F>
|
||||
inline decltype(auto) with_deserialized(const abstract_type& type, F f) const {
|
||||
auto stream = collection_mutation_input_stream(fragment_range(data));
|
||||
auto stream = collection_mutation_input_stream(data);
|
||||
return f(deserialize_collection_mutation(type, stream));
|
||||
}
|
||||
|
||||
@@ -121,11 +122,12 @@ public:
|
||||
// The mutation may also contain a collection-wide tombstone.
|
||||
class collection_mutation {
|
||||
public:
|
||||
managed_bytes _data;
|
||||
using imr_object_type = imr::utils::object<data::cell::structure>;
|
||||
imr_object_type _data;
|
||||
|
||||
collection_mutation() {}
|
||||
collection_mutation(const abstract_type&, collection_mutation_view);
|
||||
collection_mutation(const abstract_type&, managed_bytes);
|
||||
collection_mutation(const abstract_type& type, const bytes_ostream& data);
|
||||
operator collection_mutation_view() const;
|
||||
};
|
||||
|
||||
@@ -134,4 +136,4 @@ collection_mutation merge(const abstract_type&, collection_mutation_view, collec
|
||||
collection_mutation difference(const abstract_type&, collection_mutation_view, collection_mutation_view);
|
||||
|
||||
// Serializes the given collection of cells to a sequence of bytes ready to be sent over the CQL protocol.
|
||||
bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view, cql_serialization_format);
|
||||
bytes serialize_for_cql(const abstract_type&, collection_mutation_view, cql_serialization_format);
|
||||
|
||||
@@ -54,36 +54,6 @@ public:
|
||||
virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const = 0;
|
||||
};
|
||||
|
||||
/*
|
||||
* Computes token value of partition key and returns it as bytes.
|
||||
*
|
||||
* Should NOT be used (use token_column_computation), because ordering
|
||||
* of bytes is different than ordering of tokens (signed vs unsigned comparison).
|
||||
*
|
||||
* The type name stored for computations of this class is "token" - this was
|
||||
* the original implementation. (now depracated for new tables)
|
||||
*/
|
||||
class legacy_token_column_computation : public column_computation {
|
||||
public:
|
||||
virtual column_computation_ptr clone() const override {
|
||||
return std::make_unique<legacy_token_column_computation>(*this);
|
||||
}
|
||||
virtual bytes serialize() const override;
|
||||
virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const override;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Computes token value of partition key and returns it as long_type.
|
||||
* The return type means that it can be trivially sorted (for example
|
||||
* if computed column using this computation is a clustering key),
|
||||
* preserving the correct order of tokens (using signed comparisons).
|
||||
*
|
||||
* Please use this class instead of legacy_token_column_computation.
|
||||
*
|
||||
* The type name stored for computations of this class is "token_v2".
|
||||
* (the name "token" refers to the depracated legacy_token_column_computation)
|
||||
*/
|
||||
class token_column_computation : public column_computation {
|
||||
public:
|
||||
virtual column_computation_ptr clone() const override {
|
||||
|
||||
@@ -43,7 +43,7 @@ public:
|
||||
const ::schema& schema() const {
|
||||
return *_schema;
|
||||
}
|
||||
friend std::strong_ordering tri_compare(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
friend int tri_compare(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
return dht::ring_position_tri_compare(*x._schema, *x._rpv, *y._rpv);
|
||||
}
|
||||
friend bool operator<(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
@@ -83,7 +83,7 @@ public:
|
||||
const ::schema& schema() const {
|
||||
return *_schema;
|
||||
}
|
||||
friend std::strong_ordering tri_compare(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
friend int tri_compare(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
return dht::ring_position_tri_compare(*x._schema, *x._rp, *y._rp);
|
||||
}
|
||||
friend bool operator<(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
@@ -133,7 +133,7 @@ public:
|
||||
};
|
||||
return std::visit(rpv_accessor{}, *_crp_or_view);
|
||||
}
|
||||
friend std::strong_ordering tri_compare(const compatible_ring_position_or_view& x, const compatible_ring_position_or_view& y) {
|
||||
friend int tri_compare(const compatible_ring_position_or_view& x, const compatible_ring_position_or_view& y) {
|
||||
struct schema_accessor {
|
||||
const ::schema& operator()(const compatible_ring_position& crp) {
|
||||
return crp.schema();
|
||||
|
||||
108
compound.hh
108
compound.hh
@@ -73,19 +73,12 @@ private:
|
||||
* <len(value1)><value1><len(value2)><value2>...<len(value_n)><value_n>
|
||||
*
|
||||
*/
|
||||
template<typename RangeOfSerializedComponents, FragmentedMutableView Out>
|
||||
static void serialize_value(RangeOfSerializedComponents&& values, Out out) {
|
||||
template<typename RangeOfSerializedComponents, typename CharOutputIterator>
|
||||
static void serialize_value(RangeOfSerializedComponents&& values, CharOutputIterator& out) {
|
||||
for (auto&& val : values) {
|
||||
assert(val.size() <= std::numeric_limits<size_type>::max());
|
||||
write<size_type>(out, size_type(val.size()));
|
||||
using val_type = std::remove_cvref_t<decltype(val)>;
|
||||
if constexpr (FragmentedView<val_type>) {
|
||||
write_fragmented(out, val);
|
||||
} else if constexpr (std::same_as<val_type, managed_bytes>) {
|
||||
write_fragmented(out, managed_bytes_view(val));
|
||||
} else {
|
||||
write_fragmented(out, single_fragmented_view(val));
|
||||
}
|
||||
out = std::copy(val.begin(), val.end(), out);
|
||||
}
|
||||
}
|
||||
template <typename RangeOfSerializedComponents>
|
||||
@@ -97,27 +90,25 @@ private:
|
||||
return len;
|
||||
}
|
||||
public:
|
||||
managed_bytes serialize_single(managed_bytes&& v) const {
|
||||
return serialize_value({std::move(v)});
|
||||
}
|
||||
managed_bytes serialize_single(bytes&& v) const {
|
||||
bytes serialize_single(bytes&& v) const {
|
||||
return serialize_value({std::move(v)});
|
||||
}
|
||||
template<typename RangeOfSerializedComponents>
|
||||
static managed_bytes serialize_value(RangeOfSerializedComponents&& values) {
|
||||
static bytes serialize_value(RangeOfSerializedComponents&& values) {
|
||||
auto size = serialized_size(values);
|
||||
if (size > std::numeric_limits<size_type>::max()) {
|
||||
throw std::runtime_error(format("Key size too large: {:d} > {:d}", size, std::numeric_limits<size_type>::max()));
|
||||
}
|
||||
managed_bytes b(managed_bytes::initialized_later(), size);
|
||||
serialize_value(values, managed_bytes_mutable_view(b));
|
||||
bytes b(bytes::initialized_later(), size);
|
||||
auto i = b.begin();
|
||||
serialize_value(values, i);
|
||||
return b;
|
||||
}
|
||||
template<typename T>
|
||||
static managed_bytes serialize_value(std::initializer_list<T> values) {
|
||||
static bytes serialize_value(std::initializer_list<T> values) {
|
||||
return serialize_value(boost::make_iterator_range(values.begin(), values.end()));
|
||||
}
|
||||
managed_bytes serialize_optionals(const std::vector<bytes_opt>& values) const {
|
||||
bytes serialize_optionals(const std::vector<bytes_opt>& values) const {
|
||||
return serialize_value(values | boost::adaptors::transformed([] (const bytes_opt& bo) -> bytes_view {
|
||||
if (!bo) {
|
||||
throw std::logic_error("attempted to create key component from empty optional");
|
||||
@@ -125,7 +116,7 @@ public:
|
||||
return *bo;
|
||||
}));
|
||||
}
|
||||
managed_bytes serialize_value_deep(const std::vector<data_value>& values) const {
|
||||
bytes serialize_value_deep(const std::vector<data_value>& values) const {
|
||||
// TODO: Optimize
|
||||
std::vector<bytes> partial;
|
||||
partial.reserve(values.size());
|
||||
@@ -136,26 +127,19 @@ public:
|
||||
}
|
||||
return serialize_value(partial);
|
||||
}
|
||||
managed_bytes decompose_value(const value_type& values) const {
|
||||
bytes decompose_value(const value_type& values) const {
|
||||
return serialize_value(values);
|
||||
}
|
||||
class iterator {
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = const managed_bytes_view;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = const value_type*;
|
||||
using reference = const value_type&;
|
||||
class iterator : public std::iterator<std::input_iterator_tag, const bytes_view> {
|
||||
private:
|
||||
managed_bytes_view _v;
|
||||
managed_bytes_view _current;
|
||||
size_t _remaining = 0;
|
||||
bytes_view _v;
|
||||
bytes_view _current;
|
||||
private:
|
||||
void read_current() {
|
||||
_remaining = _v.size_bytes();
|
||||
size_type len;
|
||||
{
|
||||
if (_v.empty()) {
|
||||
_v = bytes_view(nullptr, 0);
|
||||
return;
|
||||
}
|
||||
len = read_simple<size_type>(_v);
|
||||
@@ -163,16 +147,15 @@ public:
|
||||
throw_with_backtrace<marshal_exception>(format("compound_type iterator - not enough bytes, expected {:d}, got {:d}", len, _v.size()));
|
||||
}
|
||||
}
|
||||
_current = _v.prefix(len);
|
||||
_v.remove_prefix(_current.size_bytes());
|
||||
_current = bytes_view(_v.begin(), len);
|
||||
_v.remove_prefix(len);
|
||||
}
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
iterator(const managed_bytes_view& v) : _v(v) {
|
||||
iterator(const bytes_view& v) : _v(v) {
|
||||
read_current();
|
||||
}
|
||||
iterator(end_iterator_tag, const managed_bytes_view& v) : _v() {}
|
||||
iterator() {}
|
||||
iterator(end_iterator_tag, const bytes_view& v) : _v(nullptr, 0) {}
|
||||
iterator& operator++() {
|
||||
read_current();
|
||||
return *this;
|
||||
@@ -184,40 +167,29 @@ public:
|
||||
}
|
||||
const value_type& operator*() const { return _current; }
|
||||
const value_type* operator->() const { return &_current; }
|
||||
bool operator==(const iterator& i) const { return _remaining == i._remaining; }
|
||||
bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); }
|
||||
bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); }
|
||||
};
|
||||
static iterator begin(managed_bytes_view v) {
|
||||
static iterator begin(const bytes_view& v) {
|
||||
return iterator(v);
|
||||
}
|
||||
static iterator end(managed_bytes_view v) {
|
||||
static iterator end(const bytes_view& v) {
|
||||
return iterator(typename iterator::end_iterator_tag(), v);
|
||||
}
|
||||
static boost::iterator_range<iterator> components(managed_bytes_view v) {
|
||||
static boost::iterator_range<iterator> components(const bytes_view& v) {
|
||||
return { begin(v), end(v) };
|
||||
}
|
||||
value_type deserialize_value(managed_bytes_view v) const {
|
||||
value_type deserialize_value(bytes_view v) const {
|
||||
std::vector<bytes> result;
|
||||
result.reserve(_types.size());
|
||||
std::transform(begin(v), end(v), std::back_inserter(result), [] (auto&& v) {
|
||||
return to_bytes(v);
|
||||
return bytes(v.begin(), v.end());
|
||||
});
|
||||
return result;
|
||||
}
|
||||
bool less(managed_bytes_view b1, managed_bytes_view b2) const {
|
||||
return with_linearized(b1, [&] (bytes_view bv1) {
|
||||
return with_linearized(b2, [&] (bytes_view bv2) {
|
||||
return less(bv1, bv2);
|
||||
});
|
||||
});
|
||||
}
|
||||
bool less(bytes_view b1, bytes_view b2) const {
|
||||
return compare(b1, b2) < 0;
|
||||
}
|
||||
size_t hash(managed_bytes_view v) const{
|
||||
return with_linearized(v, [&] (bytes_view v) {
|
||||
return hash(v);
|
||||
});
|
||||
}
|
||||
size_t hash(bytes_view v) const {
|
||||
if (_byte_order_equal) {
|
||||
return std::hash<bytes_view>()(v);
|
||||
@@ -230,13 +202,6 @@ public:
|
||||
}
|
||||
return h;
|
||||
}
|
||||
int compare(managed_bytes_view b1, managed_bytes_view b2) const {
|
||||
return with_linearized(b1, [&] (bytes_view bv1) {
|
||||
return with_linearized(b2, [&] (bytes_view bv2) {
|
||||
return compare(bv1, bv2);
|
||||
});
|
||||
});
|
||||
}
|
||||
int compare(bytes_view b1, bytes_view b2) const {
|
||||
if (_byte_order_comparable) {
|
||||
if (_is_reversed) {
|
||||
@@ -251,21 +216,15 @@ public:
|
||||
});
|
||||
}
|
||||
// Retruns true iff given prefix has no missing components
|
||||
bool is_full(managed_bytes_view v) const {
|
||||
bool is_full(bytes_view v) const {
|
||||
assert(AllowPrefixes == allow_prefixes::yes);
|
||||
return std::distance(begin(v), end(v)) == (ssize_t)_types.size();
|
||||
}
|
||||
bool is_empty(managed_bytes_view v) const {
|
||||
return v.empty();
|
||||
}
|
||||
bool is_empty(const managed_bytes& v) const {
|
||||
return v.empty();
|
||||
}
|
||||
bool is_empty(bytes_view v) const {
|
||||
return begin(v) == end(v);
|
||||
}
|
||||
void validate(managed_bytes_view v) const {
|
||||
std::vector<managed_bytes_view> values(begin(v), end(v));
|
||||
void validate(bytes_view v) const {
|
||||
std::vector<bytes_view> values(begin(v), end(v));
|
||||
if (AllowPrefixes == allow_prefixes::no && values.size() < _types.size()) {
|
||||
throw marshal_exception(fmt::format("compound::validate(): non-prefixable compound cannot be a prefix"));
|
||||
}
|
||||
@@ -278,13 +237,6 @@ public:
|
||||
_types[i]->validate(values[i], cql_serialization_format::internal());
|
||||
}
|
||||
}
|
||||
bool equal(managed_bytes_view v1, managed_bytes_view v2) const {
|
||||
return with_linearized(v1, [&] (bytes_view bv1) {
|
||||
return with_linearized(v2, [&] (bytes_view bv2) {
|
||||
return equal(bv1, bv2);
|
||||
});
|
||||
});
|
||||
}
|
||||
bool equal(bytes_view v1, bytes_view v2) const {
|
||||
if (_byte_order_equal) {
|
||||
return compare_unsigned(v1, v2) == 0;
|
||||
|
||||
@@ -54,21 +54,14 @@ template <typename CompoundType>
|
||||
class legacy_compound_view {
|
||||
static_assert(!CompoundType::is_prefixable, "Legacy view not defined for prefixes");
|
||||
CompoundType& _type;
|
||||
managed_bytes_view _packed;
|
||||
bytes_view _packed;
|
||||
public:
|
||||
legacy_compound_view(CompoundType& c, managed_bytes_view packed)
|
||||
legacy_compound_view(CompoundType& c, bytes_view packed)
|
||||
: _type(c)
|
||||
, _packed(packed)
|
||||
{ }
|
||||
|
||||
class iterator {
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = bytes::value_type;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = bytes::value_type*;
|
||||
using reference = bytes::value_type&;
|
||||
private:
|
||||
class iterator : public std::iterator<std::input_iterator_tag, bytes::value_type> {
|
||||
bool _singular;
|
||||
// Offset within virtual output space of a component.
|
||||
//
|
||||
@@ -147,18 +140,18 @@ public:
|
||||
{ }
|
||||
|
||||
// @k1 and @k2 must be serialized using @type, which was passed to the constructor.
|
||||
int operator()(managed_bytes_view k1, managed_bytes_view k2) const {
|
||||
int operator()(bytes_view k1, bytes_view k2) const {
|
||||
if (_type.is_singular()) {
|
||||
return compare_unsigned(*_type.begin(k1), *_type.begin(k2));
|
||||
}
|
||||
return lexicographical_tri_compare(
|
||||
_type.begin(k1), _type.end(k1),
|
||||
_type.begin(k2), _type.end(k2),
|
||||
[] (const managed_bytes_view& c1, const managed_bytes_view& c2) -> int {
|
||||
[] (const bytes_view& c1, const bytes_view& c2) -> int {
|
||||
if (c1.size() != c2.size() || !c1.size()) {
|
||||
return c1.size() < c2.size() ? -1 : c1.size() ? 1 : 0;
|
||||
}
|
||||
return compare_unsigned(c1, c2);
|
||||
return memcmp(c1.begin(), c2.begin(), c1.size());
|
||||
});
|
||||
}
|
||||
};
|
||||
@@ -188,7 +181,7 @@ public:
|
||||
// @packed is assumed to be serialized using supplied @type.
|
||||
template <typename CompoundType>
|
||||
static inline
|
||||
bytes to_legacy(CompoundType& type, managed_bytes_view packed) {
|
||||
bytes to_legacy(CompoundType& type, bytes_view packed) {
|
||||
legacy_compound_view<CompoundType> lv(type, packed);
|
||||
bytes legacy_form(bytes::initialized_later(), lv.size());
|
||||
std::copy(lv.begin(), lv.end(), legacy_form.begin());
|
||||
@@ -264,12 +257,6 @@ private:
|
||||
static void write_value(Value&& val, CharOutputIterator& out) {
|
||||
out = std::copy(val.begin(), val.end(), out);
|
||||
}
|
||||
template<typename CharOutputIterator>
|
||||
static void write_value(managed_bytes_view val, CharOutputIterator& out) {
|
||||
for (bytes_view frag : fragment_range(val)) {
|
||||
out = std::copy(frag.begin(), frag.end(), out);
|
||||
}
|
||||
}
|
||||
template <typename CharOutputIterator>
|
||||
static void write_value(const data_value& val, CharOutputIterator& out) {
|
||||
val.serialize(out);
|
||||
@@ -352,14 +339,7 @@ public:
|
||||
return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end);
|
||||
}
|
||||
|
||||
class iterator {
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = const component_view;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = const component_view*;
|
||||
using reference = const component_view&;
|
||||
private:
|
||||
class iterator : public std::iterator<std::input_iterator_tag, const component_view> {
|
||||
bytes_view _v;
|
||||
component_view _current;
|
||||
bool _strict_mode = true;
|
||||
@@ -411,7 +391,6 @@ public:
|
||||
iterator(end_iterator_tag) : _v(nullptr, 0) {}
|
||||
|
||||
public:
|
||||
iterator() : iterator(end_iterator_tag()) {}
|
||||
iterator& operator++() {
|
||||
read_current();
|
||||
return *this;
|
||||
|
||||
@@ -99,8 +99,8 @@ listen_address: localhost
|
||||
# listen_on_broadcast_address: false
|
||||
|
||||
# port for the CQL native transport to listen for clients on
|
||||
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
|
||||
# To disable the CQL native transport, remove this option and configure native_transport_port_ssl.
|
||||
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
|
||||
# To disable the CQL native transport, set this option to 0.
|
||||
native_transport_port: 9042
|
||||
|
||||
# Like native_transport_port, but clients are forwarded to specific shards, based on the
|
||||
@@ -230,9 +230,6 @@ batch_size_fail_threshold_in_kb: 50
|
||||
# - PasswordAuthenticator relies on username/password pairs to authenticate
|
||||
# users. It keeps usernames and hashed passwords in system_auth.credentials table.
|
||||
# Please increase system_auth keyspace replication factor if you use this authenticator.
|
||||
# - com.scylladb.auth.TransitionalAuthenticator requires username/password pair
|
||||
# to authenticate in the same manner as PasswordAuthenticator, but improper credentials
|
||||
# result in being logged in as an anonymous user. Use for upgrading clusters' auth.
|
||||
# authenticator: AllowAllAuthenticator
|
||||
|
||||
# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
|
||||
@@ -242,9 +239,6 @@ batch_size_fail_threshold_in_kb: 50
|
||||
# - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
|
||||
# - CassandraAuthorizer stores permissions in system_auth.permissions table. Please
|
||||
# increase system_auth keyspace replication factor if you use this authorizer.
|
||||
# - com.scylladb.auth.TransitionalAuthorizer wraps around the CassandraAuthorizer, using it for
|
||||
# authorizing permission management. Otherwise, it allows all. Use for upgrading
|
||||
# clusters' auth.
|
||||
# authorizer: AllowAllAuthorizer
|
||||
|
||||
# initial_token allows you to specify tokens manually. While you can use # it with
|
||||
|
||||
317
configure.py
317
configure.py
@@ -59,9 +59,6 @@ i18n_xlat = {
|
||||
}
|
||||
|
||||
python3_dependencies = subprocess.run('./install-dependencies.sh --print-python3-runtime-packages', shell=True, capture_output=True, encoding='utf-8').stdout.strip()
|
||||
node_exporter_filename = subprocess.run('./install-dependencies.sh --print-node-exporter-filename', shell=True, capture_output=True, encoding='utf-8').stdout.strip()
|
||||
node_exporter_dirname = os.path.basename(node_exporter_filename).rstrip('.tar.gz')
|
||||
|
||||
|
||||
def pkgname(name):
|
||||
if name in i18n_xlat:
|
||||
@@ -126,21 +123,18 @@ def ensure_tmp_dir_exists():
|
||||
os.makedirs(tempfile.tempdir)
|
||||
|
||||
|
||||
def try_compile_and_link(compiler, source='', flags=[], verbose=False):
|
||||
def try_compile_and_link(compiler, source='', flags=[]):
|
||||
ensure_tmp_dir_exists()
|
||||
with tempfile.NamedTemporaryFile() as sfile:
|
||||
ofile = tempfile.mktemp()
|
||||
try:
|
||||
sfile.file.write(bytes(source, 'utf-8'))
|
||||
sfile.file.flush()
|
||||
ret = subprocess.run([compiler, '-x', 'c++', '-o', ofile, sfile.name] + args.user_cflags.split() + flags,
|
||||
capture_output=True)
|
||||
if verbose:
|
||||
print(f"Compilation failed: {compiler} -x c++ -o {ofile} {sfile.name} {args.user_cflags} {flags}")
|
||||
print(source)
|
||||
print(ret.stdout.decode('utf-8'))
|
||||
print(ret.stderr.decode('utf-8'))
|
||||
return ret.returncode == 0
|
||||
# We can't write to /dev/null, since in some cases (-ftest-coverage) gcc will create an auxiliary
|
||||
# output file based on the name of the output file, and "/dev/null.gcsa" is not a good name
|
||||
return subprocess.call([compiler, '-x', 'c++', '-o', ofile, sfile.name] + args.user_cflags.split() + flags,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL) == 0
|
||||
finally:
|
||||
if os.path.exists(ofile):
|
||||
os.unlink(ofile)
|
||||
@@ -167,21 +161,7 @@ def linker_flags(compiler):
|
||||
link_flags.append(threads_flag)
|
||||
return ' '.join(link_flags)
|
||||
else:
|
||||
linker = ''
|
||||
try:
|
||||
subprocess.call(["gold", "-v"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
linker = 'gold'
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
subprocess.call(["lld", "-v"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
linker = 'lld'
|
||||
except:
|
||||
pass
|
||||
if linker:
|
||||
print(f'Linker {linker} found, but the compilation attempt failed, defaulting to default system linker')
|
||||
else:
|
||||
print('Note: neither lld nor gold found; using default system linker')
|
||||
print('Note: neither lld nor gold found; using default system linker')
|
||||
return ''
|
||||
|
||||
|
||||
@@ -275,25 +255,21 @@ modes = {
|
||||
'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
|
||||
'cxx_ld_flags': '',
|
||||
'stack-usage-threshold': 1024*40,
|
||||
'optimization-level': 'g',
|
||||
},
|
||||
'release': {
|
||||
'cxxflags': '-ffunction-sections -fdata-sections ',
|
||||
'cxx_ld_flags': '-Wl,--gc-sections',
|
||||
'cxxflags': '',
|
||||
'cxx_ld_flags': '-O3 -ffunction-sections -fdata-sections -Wl,--gc-sections',
|
||||
'stack-usage-threshold': 1024*13,
|
||||
'optimization-level': '3',
|
||||
},
|
||||
'dev': {
|
||||
'cxxflags': '-DDEVEL -DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
|
||||
'cxx_ld_flags': '',
|
||||
'cxxflags': '-DSEASTAR_ENABLE_ALLOC_FAILURE_INJECTION -DSCYLLA_ENABLE_ERROR_INJECTION',
|
||||
'cxx_ld_flags': '-O1',
|
||||
'stack-usage-threshold': 1024*21,
|
||||
'optimization-level': '2',
|
||||
},
|
||||
'sanitize': {
|
||||
'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
|
||||
'cxx_ld_flags': '',
|
||||
'cxx_ld_flags': '-Os',
|
||||
'stack-usage-threshold': 1024*50,
|
||||
'optimization-level': 's',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -302,7 +278,7 @@ scylla_tests = set([
|
||||
'test/boost/cdc_generation_test',
|
||||
'test/boost/aggregate_fcts_test',
|
||||
'test/boost/allocation_strategy_test',
|
||||
'test/boost/alternator_unit_test',
|
||||
'test/boost/alternator_base64_test',
|
||||
'test/boost/anchorless_list_test',
|
||||
'test/boost/auth_passwords_test',
|
||||
'test/boost/auth_resource_test',
|
||||
@@ -339,7 +315,6 @@ scylla_tests = set([
|
||||
'test/boost/crc_test',
|
||||
'test/boost/data_listeners_test',
|
||||
'test/boost/database_test',
|
||||
'test/boost/double_decker_test',
|
||||
'test/boost/duration_test',
|
||||
'test/boost/dynamic_bitset_test',
|
||||
'test/boost/enum_option_test',
|
||||
@@ -354,7 +329,6 @@ scylla_tests = set([
|
||||
'test/boost/gossip_test',
|
||||
'test/boost/gossiping_property_file_snitch_test',
|
||||
'test/boost/hash_test',
|
||||
'test/boost/hashers_test',
|
||||
'test/boost/idl_test',
|
||||
'test/boost/input_stream_test',
|
||||
'test/boost/json_cql_query_test',
|
||||
@@ -369,10 +343,10 @@ scylla_tests = set([
|
||||
'test/boost/estimated_histogram_test',
|
||||
'test/boost/logalloc_test',
|
||||
'test/boost/managed_vector_test',
|
||||
'test/boost/managed_bytes_test',
|
||||
'test/boost/intrusive_array_test',
|
||||
'test/boost/map_difference_test',
|
||||
'test/boost/memtable_test',
|
||||
'test/boost/meta_test',
|
||||
'test/boost/multishard_mutation_query_test',
|
||||
'test/boost/murmur_hash_test',
|
||||
'test/boost/mutation_fragment_test',
|
||||
@@ -397,7 +371,6 @@ scylla_tests = set([
|
||||
'test/boost/schema_change_test',
|
||||
'test/boost/schema_registry_test',
|
||||
'test/boost/secondary_index_test',
|
||||
'test/boost/tracing',
|
||||
'test/boost/index_with_paging_test',
|
||||
'test/boost/serialization_test',
|
||||
'test/boost/serialized_action_test',
|
||||
@@ -411,8 +384,6 @@ scylla_tests = set([
|
||||
'test/boost/sstable_resharding_test',
|
||||
'test/boost/sstable_directory_test',
|
||||
'test/boost/sstable_test',
|
||||
'test/boost/sstable_move_test',
|
||||
'test/boost/statement_restrictions_test',
|
||||
'test/boost/storage_proxy_test',
|
||||
'test/boost/top_k_test',
|
||||
'test/boost/transport_test',
|
||||
@@ -428,13 +399,9 @@ scylla_tests = set([
|
||||
'test/boost/vint_serialization_test',
|
||||
'test/boost/virtual_reader_test',
|
||||
'test/boost/bptree_test',
|
||||
'test/boost/btree_test',
|
||||
'test/boost/radix_tree_test',
|
||||
'test/boost/double_decker_test',
|
||||
'test/boost/stall_free_test',
|
||||
'test/boost/raft_address_map_test',
|
||||
'test/boost/raft_sys_table_storage_test',
|
||||
'test/boost/sstable_set_test',
|
||||
'test/boost/imr_test',
|
||||
'test/manual/ec2_snitch_test',
|
||||
'test/manual/enormous_table_scan_test',
|
||||
'test/manual/gce_snitch_test',
|
||||
@@ -451,9 +418,8 @@ scylla_tests = set([
|
||||
'test/perf/perf_fast_forward',
|
||||
'test/perf/perf_hash',
|
||||
'test/perf/perf_mutation',
|
||||
'test/perf/perf_collection',
|
||||
'test/perf/perf_bptree',
|
||||
'test/perf/perf_row_cache_update',
|
||||
'test/perf/perf_row_cache_reads',
|
||||
'test/perf/perf_simple_query',
|
||||
'test/perf/perf_sstable',
|
||||
'test/unit/lsa_async_eviction_test',
|
||||
@@ -461,11 +427,7 @@ scylla_tests = set([
|
||||
'test/unit/row_cache_alloc_stress_test',
|
||||
'test/unit/row_cache_stress_test',
|
||||
'test/unit/bptree_stress_test',
|
||||
'test/unit/btree_stress_test',
|
||||
'test/unit/bptree_compaction_test',
|
||||
'test/unit/btree_compaction_test',
|
||||
'test/unit/radix_tree_stress_test',
|
||||
'test/unit/radix_tree_compaction_test',
|
||||
])
|
||||
|
||||
perf_tests = set([
|
||||
@@ -479,15 +441,13 @@ perf_tests = set([
|
||||
|
||||
raft_tests = set([
|
||||
'test/raft/replication_test',
|
||||
'test/raft/fsm_test',
|
||||
'test/raft/etcd_test',
|
||||
'test/boost/raft_fsm_test',
|
||||
])
|
||||
|
||||
apps = set([
|
||||
'scylla',
|
||||
'test/tools/cql_repl',
|
||||
'tools/scylla-types',
|
||||
'tools/scylla-sstable-index',
|
||||
])
|
||||
|
||||
tests = scylla_tests | perf_tests | raft_tests
|
||||
@@ -517,9 +477,9 @@ arg_parser.add_argument('--ldflags', action='store', dest='user_ldflags', defaul
|
||||
help='Extra flags for the linker')
|
||||
arg_parser.add_argument('--target', action='store', dest='target', default=default_target_arch(),
|
||||
help='Target architecture (-march)')
|
||||
arg_parser.add_argument('--compiler', action='store', dest='cxx', default='clang++',
|
||||
arg_parser.add_argument('--compiler', action='store', dest='cxx', default='g++',
|
||||
help='C++ compiler path')
|
||||
arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='clang',
|
||||
arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='gcc',
|
||||
help='C compiler path')
|
||||
add_tristate(arg_parser, name='dpdk', dest='dpdk',
|
||||
help='Use dpdk (from seastar dpdk sources) (default=True for release builds)')
|
||||
@@ -527,8 +487,6 @@ arg_parser.add_argument('--dpdk-target', action='store', dest='dpdk_target', def
|
||||
help='Path to DPDK SDK target location (e.g. <DPDK SDK dir>/x86_64-native-linuxapp-gcc)')
|
||||
arg_parser.add_argument('--debuginfo', action='store', dest='debuginfo', type=int, default=1,
|
||||
help='Enable(1)/disable(0)compiler debug information generation')
|
||||
arg_parser.add_argument('--optimization-level', action='append', dest='mode_o_levels', metavar='MODE=LEVEL', default=[],
|
||||
help=f'Override default compiler optimization level for mode (defaults: {" ".join([x+"="+modes[x]["optimization-level"] for x in modes])})')
|
||||
arg_parser.add_argument('--static-stdc++', dest='staticcxx', action='store_true',
|
||||
help='Link libgcc and libstdc++ statically')
|
||||
arg_parser.add_argument('--static-thrift', dest='staticthrift', action='store_true',
|
||||
@@ -551,34 +509,36 @@ arg_parser.add_argument('--with-antlr3', dest='antlr3_exec', action='store', def
|
||||
help='path to antlr3 executable')
|
||||
arg_parser.add_argument('--with-ragel', dest='ragel_exec', action='store', default='ragel',
|
||||
help='path to ragel executable')
|
||||
arg_parser.add_argument('--build-raft', dest='build_raft', action='store_true', default=False,
|
||||
help='build raft code')
|
||||
add_tristate(arg_parser, name='stack-guards', dest='stack_guards', help='Use stack guards')
|
||||
arg_parser.add_argument('--verbose', dest='verbose', action='store_true',
|
||||
help='Make configure.py output more verbose (useful for debugging the build process itself)')
|
||||
arg_parser.add_argument('--test-repeat', dest='test_repeat', action='store', type=str, default='1',
|
||||
help='Set number of times to repeat each unittest.')
|
||||
arg_parser.add_argument('--test-timeout', dest='test_timeout', action='store', type=str, default='7200')
|
||||
arg_parser.add_argument('--clang-inline-threshold', action='store', type=int, dest='clang_inline_threshold', default=-1,
|
||||
help="LLVM-specific inline threshold compilation parameter")
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
coroutines_test_src = '''
|
||||
#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
|
||||
#if GCC_VERSION < 100201
|
||||
#error "Coroutines support requires at leat gcc 10.2.1"
|
||||
#endif
|
||||
'''
|
||||
compiler_supports_coroutines = try_compile(compiler=args.cxx, source=coroutines_test_src)
|
||||
|
||||
if args.build_raft and not compiler_supports_coroutines:
|
||||
raise Exception("--build-raft is requested, while the used compiler does not support coroutines")
|
||||
|
||||
if not args.build_raft:
|
||||
all_artifacts.difference_update(raft_tests)
|
||||
tests.difference_update(raft_tests)
|
||||
|
||||
defines = ['XXH_PRIVATE_API',
|
||||
'SEASTAR_TESTING_MAIN',
|
||||
]
|
||||
|
||||
extra_cxxflags = {
|
||||
'debug': {},
|
||||
'dev': {},
|
||||
'release': {},
|
||||
'sanitize': {}
|
||||
}
|
||||
|
||||
scylla_raft_core = [
|
||||
'raft/raft.cc',
|
||||
'raft/server.cc',
|
||||
'raft/fsm.cc',
|
||||
'raft/tracker.cc',
|
||||
'raft/log.cc',
|
||||
]
|
||||
extra_cxxflags = {}
|
||||
|
||||
scylla_core = (['database.cc',
|
||||
'absl-flat_hash_map.cc',
|
||||
@@ -621,16 +581,14 @@ scylla_core = (['database.cc',
|
||||
'counters.cc',
|
||||
'compress.cc',
|
||||
'zstd.cc',
|
||||
'sstables/mp_row_consumer.cc',
|
||||
'sstables/sstables.cc',
|
||||
'sstables/sstables_manager.cc',
|
||||
'sstables/sstable_set.cc',
|
||||
'sstables/mx/reader.cc',
|
||||
'sstables/mx/writer.cc',
|
||||
'sstables/kl/reader.cc',
|
||||
'sstables/kl/writer.cc',
|
||||
'sstables/sstable_version.cc',
|
||||
'sstables/compress.cc',
|
||||
'sstables/sstable_mutation_reader.cc',
|
||||
'sstables/partition.cc',
|
||||
'sstables/compaction.cc',
|
||||
'sstables/compaction_strategy.cc',
|
||||
'sstables/size_tiered_compaction_strategy.cc',
|
||||
@@ -769,7 +727,6 @@ scylla_core = (['database.cc',
|
||||
'db/data_listeners.cc',
|
||||
'db/hints/manager.cc',
|
||||
'db/hints/resource_manager.cc',
|
||||
'db/hints/host_filter.cc',
|
||||
'db/config.cc',
|
||||
'db/extensions.cc',
|
||||
'db/heat_load_balance.cc',
|
||||
@@ -887,6 +844,7 @@ scylla_core = (['database.cc',
|
||||
'vint-serialization.cc',
|
||||
'utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc',
|
||||
'querier.cc',
|
||||
'data/cell.cc',
|
||||
'mutation_writer/multishard_writer.cc',
|
||||
'multishard_mutation_query.cc',
|
||||
'reader_concurrency_semaphore.cc',
|
||||
@@ -899,14 +857,7 @@ scylla_core = (['database.cc',
|
||||
'mutation_writer/shard_based_splitting_writer.cc',
|
||||
'mutation_writer/feed_writers.cc',
|
||||
'lua.cc',
|
||||
'service/raft/schema_raft_state_machine.cc',
|
||||
'service/raft/raft_sys_table_storage.cc',
|
||||
'serializer.cc',
|
||||
'service/raft/raft_rpc.cc',
|
||||
'service/raft/raft_gossip_failure_detector.cc',
|
||||
'service/raft/raft_services.cc',
|
||||
] + [Antlr3Grammar('cql3/Cql.g')] + [Thrift('interface/cassandra.thrift', 'Cassandra')] \
|
||||
+ scylla_raft_core
|
||||
] + [Antlr3Grammar('cql3/Cql.g')] + [Thrift('interface/cassandra.thrift', 'Cassandra')]
|
||||
)
|
||||
|
||||
api = ['api/api.cc',
|
||||
@@ -1002,7 +953,6 @@ idls = ['idl/gossip_digest.idl.hh',
|
||||
'idl/view.idl.hh',
|
||||
'idl/messaging_service.idl.hh',
|
||||
'idl/paxos.idl.hh',
|
||||
'idl/raft.idl.hh',
|
||||
]
|
||||
|
||||
headers = find_headers('.', excluded_dirs=['idl', 'build', 'seastar', '.git'])
|
||||
@@ -1027,14 +977,20 @@ scylla_tests_dependencies = scylla_core + idls + scylla_tests_generic_dependenci
|
||||
'test/lib/random_schema.cc',
|
||||
]
|
||||
|
||||
scylla_raft_dependencies = scylla_raft_core + ['utils/uuid.cc']
|
||||
scylla_raft_dependencies = [
|
||||
'raft/raft.cc',
|
||||
'raft/server.cc',
|
||||
'raft/fsm.cc',
|
||||
'raft/progress.cc',
|
||||
'raft/log.cc',
|
||||
'utils/uuid.cc'
|
||||
]
|
||||
|
||||
deps = {
|
||||
'scylla': idls + ['main.cc', 'release.cc', 'utils/build_id.cc'] + scylla_core + api + alternator + redis,
|
||||
'test/tools/cql_repl': idls + ['test/tools/cql_repl.cc'] + scylla_core + scylla_tests_generic_dependencies,
|
||||
#FIXME: we don't need all of scylla_core here, only the types module, need to modularize scylla_core.
|
||||
'tools/scylla-types': idls + ['tools/scylla-types.cc'] + scylla_core,
|
||||
'tools/scylla-sstable-index': idls + ['tools/scylla-sstable-index.cc'] + scylla_core,
|
||||
}
|
||||
|
||||
pure_boost_tests = set([
|
||||
@@ -1054,13 +1010,13 @@ pure_boost_tests = set([
|
||||
'test/boost/dynamic_bitset_test',
|
||||
'test/boost/enum_option_test',
|
||||
'test/boost/enum_set_test',
|
||||
'test/boost/hashers_test',
|
||||
'test/boost/idl_test',
|
||||
'test/boost/json_test',
|
||||
'test/boost/keys_test',
|
||||
'test/boost/like_matcher_test',
|
||||
'test/boost/linearizing_input_stream_test',
|
||||
'test/boost/map_difference_test',
|
||||
'test/boost/meta_test',
|
||||
'test/boost/nonwrapping_range_test',
|
||||
'test/boost/observable_test',
|
||||
'test/boost/range_test',
|
||||
@@ -1070,13 +1026,11 @@ pure_boost_tests = set([
|
||||
'test/boost/top_k_test',
|
||||
'test/boost/vint_serialization_test',
|
||||
'test/boost/bptree_test',
|
||||
'test/boost/utf8_test',
|
||||
'test/boost/btree_test',
|
||||
'test/manual/streaming_histogram_test',
|
||||
])
|
||||
|
||||
tests_not_using_seastar_test_framework = set([
|
||||
'test/boost/alternator_unit_test',
|
||||
'test/boost/alternator_base64_test',
|
||||
'test/boost/small_vector_test',
|
||||
'test/manual/gossip',
|
||||
'test/manual/message',
|
||||
@@ -1085,17 +1039,13 @@ tests_not_using_seastar_test_framework = set([
|
||||
'test/perf/perf_cql_parser',
|
||||
'test/perf/perf_hash',
|
||||
'test/perf/perf_mutation',
|
||||
'test/perf/perf_collection',
|
||||
'test/perf/perf_bptree',
|
||||
'test/perf/perf_row_cache_update',
|
||||
'test/unit/lsa_async_eviction_test',
|
||||
'test/unit/lsa_sync_eviction_test',
|
||||
'test/unit/row_cache_alloc_stress_test',
|
||||
'test/unit/bptree_stress_test',
|
||||
'test/unit/btree_stress_test',
|
||||
'test/unit/bptree_compaction_test',
|
||||
'test/unit/btree_compaction_test',
|
||||
'test/unit/radix_tree_stress_test',
|
||||
'test/unit/radix_tree_compaction_test',
|
||||
'test/manual/sstable_scan_footprint_test',
|
||||
]) | pure_boost_tests
|
||||
|
||||
@@ -1138,6 +1088,8 @@ deps['test/boost/estimated_histogram_test'] = ['test/boost/estimated_histogram_t
|
||||
deps['test/boost/anchorless_list_test'] = ['test/boost/anchorless_list_test.cc']
|
||||
deps['test/perf/perf_fast_forward'] += ['release.cc']
|
||||
deps['test/perf/perf_simple_query'] += ['release.cc']
|
||||
deps['test/boost/meta_test'] = ['test/boost/meta_test.cc']
|
||||
deps['test/boost/imr_test'] = ['test/boost/imr_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
|
||||
deps['test/boost/reusable_buffer_test'] = [
|
||||
"test/boost/reusable_buffer_test.cc",
|
||||
"test/lib/log.cc",
|
||||
@@ -1152,11 +1104,10 @@ deps['test/boost/linearizing_input_stream_test'] = [
|
||||
]
|
||||
|
||||
deps['test/boost/duration_test'] += ['test/lib/exception_utils.cc']
|
||||
deps['test/boost/alternator_unit_test'] += ['alternator/base64.cc']
|
||||
deps['test/boost/alternator_base64_test'] += ['alternator/base64.cc']
|
||||
|
||||
deps['test/raft/replication_test'] = ['test/raft/replication_test.cc'] + scylla_raft_dependencies
|
||||
deps['test/raft/fsm_test'] = ['test/raft/fsm_test.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
|
||||
deps['test/raft/etcd_test'] = ['test/raft/etcd_test.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
|
||||
deps['test/boost/raft_fsm_test'] = ['test/boost/raft_fsm_test.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
|
||||
|
||||
deps['utils/gz/gen_crc_combine_table'] = ['utils/gz/gen_crc_combine_table.cc']
|
||||
|
||||
@@ -1197,13 +1148,12 @@ warnings = [
|
||||
'-Wno-delete-non-abstract-non-virtual-dtor',
|
||||
'-Wno-unknown-attributes',
|
||||
'-Wno-braced-scalar-init',
|
||||
'-Wno-unused-value',
|
||||
'-Wno-range-loop-construct',
|
||||
'-Wno-unused-function',
|
||||
'-Wno-implicit-int-float-conversion',
|
||||
'-Wno-delete-abstract-non-virtual-dtor',
|
||||
'-Wno-uninitialized-const-reference',
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77728
|
||||
'-Wno-psabi',
|
||||
]
|
||||
|
||||
warnings = [w
|
||||
@@ -1212,36 +1162,18 @@ warnings = [w
|
||||
|
||||
warnings = ' '.join(warnings + ['-Wno-error=deprecated-declarations'])
|
||||
|
||||
def clang_inline_threshold():
|
||||
if args.clang_inline_threshold != -1:
|
||||
return args.clang_inline_threshold
|
||||
elif platform.machine() == 'aarch64':
|
||||
# we see miscompiles with 1200 and above with format("{}", uuid)
|
||||
return 600
|
||||
else:
|
||||
return 2500
|
||||
|
||||
optimization_flags = [
|
||||
'--param inline-unit-growth=300', # gcc
|
||||
f'-mllvm -inline-threshold={clang_inline_threshold()}', # clang
|
||||
'-mllvm -inline-threshold=2500', # clang
|
||||
]
|
||||
optimization_flags = [o
|
||||
for o in optimization_flags
|
||||
if flag_supported(flag=o, compiler=args.cxx)]
|
||||
modes['release']['cxxflags'] += ' ' + ' '.join(optimization_flags)
|
||||
modes['release']['cxx_ld_flags'] += ' ' + ' '.join(optimization_flags)
|
||||
|
||||
if flag_supported(flag='-Wstack-usage=4096', compiler=args.cxx):
|
||||
for mode in modes:
|
||||
modes[mode]['cxxflags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='
|
||||
|
||||
for mode_level in args.mode_o_levels:
|
||||
( mode, level ) = mode_level.split('=', 2)
|
||||
if mode not in modes:
|
||||
raise Exception(f'Mode {mode} is missing, cannot configure optimization level for it')
|
||||
modes[mode]['optimization-level'] = level
|
||||
|
||||
for mode in modes:
|
||||
modes[mode]['cxxflags'] += f' -O{modes[mode]["optimization-level"]}'
|
||||
modes[mode]['cxx_ld_flags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='
|
||||
|
||||
linker_flags = linker_flags(compiler=args.cxx)
|
||||
|
||||
@@ -1308,8 +1240,7 @@ compiler_test_src = '''
|
||||
int main() { return 0; }
|
||||
'''
|
||||
if not try_compile_and_link(compiler=args.cxx, source=compiler_test_src):
|
||||
try_compile_and_link(compiler=args.cxx, source=compiler_test_src, verbose=True)
|
||||
print('Wrong compiler version or incorrect flags. Scylla needs GCC >= 10.1.1 with coroutines (-fcoroutines) or clang >= 10.0.0 to compile.')
|
||||
print('Wrong GCC version. Scylla needs GCC >= 10.1.1 to compile.')
|
||||
sys.exit(1)
|
||||
|
||||
if not try_compile(compiler=args.cxx, source='#include <boost/version.hpp>'):
|
||||
@@ -1357,12 +1288,8 @@ file = open(f'{outdir}/SCYLLA-VERSION-FILE', 'r')
|
||||
scylla_version = file.read().strip()
|
||||
file = open(f'{outdir}/SCYLLA-RELEASE-FILE', 'r')
|
||||
scylla_release = file.read().strip()
|
||||
file = open(f'{outdir}/SCYLLA-PRODUCT-FILE', 'r')
|
||||
scylla_product = file.read().strip()
|
||||
|
||||
for m in ['debug', 'release', 'sanitize', 'dev']:
|
||||
cxxflags = "-DSCYLLA_VERSION=\"\\\"" + scylla_version + "\\\"\" -DSCYLLA_RELEASE=\"\\\"" + scylla_release + "\\\"\" -DSCYLLA_BUILD_MODE=\"\\\"" + m + "\\\"\""
|
||||
extra_cxxflags[m]["release.cc"] = cxxflags
|
||||
extra_cxxflags["release.cc"] = "-DSCYLLA_VERSION=\"\\\"" + scylla_version + "\\\"\" -DSCYLLA_RELEASE=\"\\\"" + scylla_release + "\\\"\""
|
||||
|
||||
for m in ['debug', 'release', 'sanitize']:
|
||||
modes[m]['cxxflags'] += ' ' + dbgflag
|
||||
@@ -1402,6 +1329,9 @@ args.user_cflags += f" -ffile-prefix-map={curdir}=."
|
||||
|
||||
seastar_cflags = args.user_cflags
|
||||
|
||||
if build_raft:
|
||||
seastar_cflags += ' -fcoroutines'
|
||||
|
||||
if args.target != '':
|
||||
seastar_cflags += ' -march=' + args.target
|
||||
seastar_ldflags = args.user_ldflags
|
||||
@@ -1410,13 +1340,6 @@ libdeflate_cflags = seastar_cflags
|
||||
|
||||
MODE_TO_CMAKE_BUILD_TYPE = {'release' : 'RelWithDebInfo', 'debug' : 'Debug', 'dev' : 'Dev', 'sanitize' : 'Sanitize' }
|
||||
|
||||
# cmake likes to separate things with semicolons
|
||||
def semicolon_separated(*flags):
|
||||
# original flags may be space separated, so convert to string still
|
||||
# using spaces
|
||||
f = ' '.join(flags)
|
||||
return re.sub(' +', ';', f)
|
||||
|
||||
def configure_seastar(build_dir, mode):
|
||||
seastar_build_dir = os.path.join(build_dir, mode, 'seastar')
|
||||
|
||||
@@ -1425,8 +1348,8 @@ def configure_seastar(build_dir, mode):
|
||||
'-DCMAKE_C_COMPILER={}'.format(args.cc),
|
||||
'-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
|
||||
'-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON',
|
||||
'-DSeastar_CXX_FLAGS={}'.format((seastar_cflags).replace(' ', ';')),
|
||||
'-DSeastar_LD_FLAGS={}'.format(semicolon_separated(seastar_ldflags, modes[mode]['cxx_ld_flags'])),
|
||||
'-DSeastar_CXX_FLAGS={}'.format((seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']).replace(' ', ';')),
|
||||
'-DSeastar_LD_FLAGS={}'.format(seastar_ldflags),
|
||||
'-DSeastar_CXX_DIALECT=gnu++20',
|
||||
'-DSeastar_API_LEVEL=6',
|
||||
'-DSeastar_UNUSED_RESULT_ERROR=ON',
|
||||
@@ -1516,7 +1439,6 @@ abseil_libs = ['absl/' + lib for lib in [
|
||||
'numeric/libabsl_int128.a',
|
||||
'hash/libabsl_city.a',
|
||||
'hash/libabsl_hash.a',
|
||||
'hash/libabsl_wyhash.a',
|
||||
'base/libabsl_malloc_internal.a',
|
||||
'base/libabsl_spinlock_wait.a',
|
||||
'base/libabsl_base.a',
|
||||
@@ -1537,6 +1459,9 @@ libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-l
|
||||
if not args.staticboost:
|
||||
args.user_cflags += ' -DBOOST_TEST_DYN_LINK'
|
||||
|
||||
if build_raft:
|
||||
args.user_cflags += ' -DENABLE_SCYLLA_RAFT -fcoroutines'
|
||||
|
||||
# thrift version detection, see #4538
|
||||
proc_res = subprocess.run(["thrift", "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
proc_res_output = proc_res.stdout.decode("utf-8")
|
||||
@@ -1817,8 +1742,8 @@ with open(buildfile_tmp, 'w') as f:
|
||||
for obj in compiles:
|
||||
src = compiles[obj]
|
||||
f.write('build {}: cxx.{} {} || {} {}\n'.format(obj, mode, src, seastar_dep, gen_headers_dep))
|
||||
if src in extra_cxxflags[mode]:
|
||||
f.write(' cxxflags = {seastar_cflags} $cxxflags $cxxflags_{mode} {extra_cxxflags}\n'.format(mode=mode, extra_cxxflags=extra_cxxflags[mode][src], **modeval))
|
||||
if src in extra_cxxflags:
|
||||
f.write(' cxxflags = {seastar_cflags} $cxxflags $cxxflags_{mode} {extra_cxxflags}\n'.format(mode=mode, extra_cxxflags=extra_cxxflags[src], **modeval))
|
||||
for swagger in swaggers:
|
||||
hh = swagger.headers(gen_dir)[0]
|
||||
cc = swagger.sources(gen_dir)[0]
|
||||
@@ -1874,18 +1799,24 @@ with open(buildfile_tmp, 'w') as f:
|
||||
f.write(textwrap.dedent('''\
|
||||
build $builddir/{mode}/iotune: copy $builddir/{mode}/seastar/apps/iotune/iotune
|
||||
''').format(**locals()))
|
||||
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: package $builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter | always\n'.format(**locals()))
|
||||
f.write('build $builddir/{mode}/dist/tar/scylla-package.tar.gz: package $builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian | always\n'.format(**locals()))
|
||||
f.write(' pool = submodule_pool\n')
|
||||
f.write(' mode = {mode}\n'.format(**locals()))
|
||||
f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
|
||||
f.write(f'build $builddir/{mode}/scylla-package.tar.gz: copy $builddir/{mode}/dist/tar/scylla-package.tar.gz\n')
|
||||
f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/scylla-package.tar.gz\n')
|
||||
f.write(f' pool = submodule_pool\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
|
||||
f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/scylla-package.tar.gz\n')
|
||||
f.write(f' pool = submodule_pool\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian\n')
|
||||
f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz dist-jmx-rpm dist-jmx-deb\n')
|
||||
f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz dist-tools-rpm dist-tools-deb\n')
|
||||
f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb\n')
|
||||
f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz\n')
|
||||
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz: unified $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz | always\n')
|
||||
f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz dist-jmx-rpm dist-jmx-deb\n')
|
||||
f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz dist-tools-rpm dist-tools-deb\n')
|
||||
f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb compat-python3-rpm compat-python3-deb\n')
|
||||
f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz\n')
|
||||
f.write(f'build $builddir/{mode}/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/scylla-unified-package.tar.gz\n')
|
||||
f.write(f'build $builddir/{mode}/dist/tar/scylla-unified-package-{scylla_version}.{scylla_release}.tar.gz: unified $builddir/{mode}/dist/tar/scylla-package.tar.gz $builddir/{mode}/dist/tar/scylla-python3-package.tar.gz $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz | always\n')
|
||||
f.write(f' pool = submodule_pool\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
f.write('rule libdeflate.{mode}\n'.format(**locals()))
|
||||
f.write(' command = make -C libdeflate BUILD_DIR=../$builddir/{mode}/libdeflate/ CFLAGS="{libdeflate_cflags}" CC={args.cc} ../$builddir/{mode}/libdeflate//libdeflate.a\n'.format(**locals()))
|
||||
@@ -1912,12 +1843,12 @@ with open(buildfile_tmp, 'w') as f:
|
||||
)
|
||||
|
||||
f.write(textwrap.dedent(f'''\
|
||||
build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}.{scylla_release}.tar.gz' for mode in build_modes])}
|
||||
build dist-unified-tar: phony {' '.join(['$builddir/{mode}/scylla-unified-package-$scylla_version.$scylla_release.tar.gz'.format(mode=mode) for mode in build_modes])}
|
||||
build dist-unified: phony dist-unified-tar
|
||||
|
||||
build dist-server-deb: phony {' '.join(['$builddir/dist/{mode}/debian'.format(mode=mode) for mode in build_modes])}
|
||||
build dist-server-rpm: phony {' '.join(['$builddir/dist/{mode}/redhat'.format(mode=mode) for mode in build_modes])}
|
||||
build dist-server-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
|
||||
build dist-server-tar: phony {' '.join(['$builddir/{mode}/scylla-package.tar.gz'.format(mode=mode) for mode in build_modes])}
|
||||
build dist-server: phony dist-server-tar dist-server-rpm dist-server-deb
|
||||
|
||||
rule build-submodule-reloc
|
||||
@@ -1927,39 +1858,55 @@ with open(buildfile_tmp, 'w') as f:
|
||||
rule build-submodule-deb
|
||||
command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact
|
||||
|
||||
build tools/jmx/build/{scylla_product}-jmx-package.tar.gz: build-submodule-reloc
|
||||
build tools/jmx/build/scylla-jmx-package.tar.gz: build-submodule-reloc
|
||||
reloc_dir = tools/jmx
|
||||
build dist-jmx-rpm: build-submodule-rpm tools/jmx/build/{scylla_product}-jmx-package.tar.gz
|
||||
build dist-jmx-rpm: build-submodule-rpm tools/jmx/build/scylla-jmx-package.tar.gz
|
||||
dir = tools/jmx
|
||||
artifact = $builddir/{scylla_product}-jmx-package.tar.gz
|
||||
build dist-jmx-deb: build-submodule-deb tools/jmx/build/{scylla_product}-jmx-package.tar.gz
|
||||
artifact = $builddir/scylla-jmx-package.tar.gz
|
||||
build dist-jmx-deb: build-submodule-deb tools/jmx/build/scylla-jmx-package.tar.gz
|
||||
dir = tools/jmx
|
||||
artifact = $builddir/{scylla_product}-jmx-package.tar.gz
|
||||
build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
|
||||
artifact = $builddir/scylla-jmx-package.tar.gz
|
||||
build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz'.format(mode=mode) for mode in build_modes])}
|
||||
build dist-jmx: phony dist-jmx-tar dist-jmx-rpm dist-jmx-deb
|
||||
|
||||
build tools/java/build/{scylla_product}-tools-package.tar.gz: build-submodule-reloc
|
||||
build tools/java/build/scylla-tools-package.tar.gz: build-submodule-reloc
|
||||
reloc_dir = tools/java
|
||||
build dist-tools-rpm: build-submodule-rpm tools/java/build/{scylla_product}-tools-package.tar.gz
|
||||
build dist-tools-rpm: build-submodule-rpm tools/java/build/scylla-tools-package.tar.gz
|
||||
dir = tools/java
|
||||
artifact = $builddir/{scylla_product}-tools-package.tar.gz
|
||||
build dist-tools-deb: build-submodule-deb tools/java/build/{scylla_product}-tools-package.tar.gz
|
||||
artifact = $builddir/scylla-tools-package.tar.gz
|
||||
build dist-tools-deb: build-submodule-deb tools/java/build/scylla-tools-package.tar.gz
|
||||
dir = tools/java
|
||||
artifact = $builddir/{scylla_product}-tools-package.tar.gz
|
||||
build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
|
||||
artifact = $builddir/scylla-tools-package.tar.gz
|
||||
build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-tools-package.tar.gz'.format(mode=mode) for mode in build_modes])}
|
||||
build dist-tools: phony dist-tools-tar dist-tools-rpm dist-tools-deb
|
||||
|
||||
build tools/python3/build/{scylla_product}-python3-package.tar.gz: build-submodule-reloc
|
||||
rule compat-python3-reloc
|
||||
command = mkdir -p $builddir/release && ln -f $dir/$artifact $builddir/release/
|
||||
rule compat-python3-rpm
|
||||
command = cd $dir && ./reloc/build_rpm.sh --reloc-pkg $artifact --builddir ../../build/redhat
|
||||
rule compat-python3-deb
|
||||
command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact --builddir ../../build/debian
|
||||
build $builddir/release/scylla-python3-package.tar.gz: compat-python3-reloc tools/python3/build/scylla-python3-package.tar.gz
|
||||
dir = tools/python3
|
||||
artifact = $builddir/scylla-python3-package.tar.gz
|
||||
build compat-python3-rpm: compat-python3-rpm tools/python3/build/scylla-python3-package.tar.gz
|
||||
dir = tools/python3
|
||||
artifact = $builddir/scylla-python3-package.tar.gz
|
||||
build compat-python3-deb: compat-python3-deb tools/python3/build/scylla-python3-package.tar.gz
|
||||
dir = tools/python3
|
||||
artifact = $builddir/scylla-python3-package.tar.gz
|
||||
|
||||
build tools/python3/build/scylla-python3-package.tar.gz: build-submodule-reloc
|
||||
reloc_dir = tools/python3
|
||||
args = --packages "{python3_dependencies}"
|
||||
build dist-python3-rpm: build-submodule-rpm tools/python3/build/{scylla_product}-python3-package.tar.gz
|
||||
build dist-python3-rpm: build-submodule-rpm tools/python3/build/scylla-python3-package.tar.gz
|
||||
dir = tools/python3
|
||||
artifact = $builddir/{scylla_product}-python3-package.tar.gz
|
||||
build dist-python3-deb: build-submodule-deb tools/python3/build/{scylla_product}-python3-package.tar.gz
|
||||
artifact = $builddir/scylla-python3-package.tar.gz
|
||||
build dist-python3-deb: build-submodule-deb tools/python3/build/scylla-python3-package.tar.gz
|
||||
dir = tools/python3
|
||||
artifact = $builddir/{scylla_product}-python3-package.tar.gz
|
||||
build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz'.format(mode=mode, scylla_product=scylla_product) for mode in build_modes])}
|
||||
build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb $builddir/release/{scylla_product}-python3-package.tar.gz
|
||||
artifact = $builddir/scylla-python3-package.tar.gz
|
||||
build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/scylla-python3-package.tar.gz'.format(mode=mode) for mode in build_modes])}
|
||||
build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb $builddir/release/scylla-python3-package.tar.gz compat-python3-rpm compat-python3-deb
|
||||
build dist-deb: phony dist-server-deb dist-python3-deb dist-jmx-deb dist-tools-deb
|
||||
build dist-rpm: phony dist-server-rpm dist-python3-rpm dist-jmx-rpm dist-tools-rpm
|
||||
build dist-tar: phony dist-unified-tar dist-server-tar dist-python3-tar dist-jmx-tar dist-tools-tar
|
||||
@@ -1974,9 +1921,9 @@ with open(buildfile_tmp, 'w') as f:
|
||||
'''))
|
||||
for mode in build_modes:
|
||||
f.write(textwrap.dedent(f'''\
|
||||
build $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz: copy tools/python3/build/{scylla_product}-python3-package.tar.gz
|
||||
build $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz: copy tools/java/build/{scylla_product}-tools-package.tar.gz
|
||||
build $builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz: copy tools/jmx/build/{scylla_product}-jmx-package.tar.gz
|
||||
build $builddir/{mode}/dist/tar/scylla-python3-package.tar.gz: copy tools/python3/build/scylla-python3-package.tar.gz
|
||||
build $builddir/{mode}/dist/tar/scylla-tools-package.tar.gz: copy tools/java/build/scylla-tools-package.tar.gz
|
||||
build $builddir/{mode}/dist/tar/scylla-jmx-package.tar.gz: copy tools/jmx/build/scylla-jmx-package.tar.gz
|
||||
|
||||
build dist-{mode}: phony dist-server-{mode} dist-python3-{mode} dist-tools-{mode} dist-jmx-{mode} dist-unified-{mode}
|
||||
build dist-check-{mode}: dist-check
|
||||
@@ -2002,13 +1949,6 @@ with open(buildfile_tmp, 'w') as f:
|
||||
build mode_list: mode_list
|
||||
default {modes_list}
|
||||
''').format(modes_list=' '.join(default_modes), **globals()))
|
||||
unit_test_list = set(test for test in build_artifacts if test in set(tests))
|
||||
f.write(textwrap.dedent('''\
|
||||
rule unit_test_list
|
||||
command = /usr/bin/env echo -e '{unit_test_list}'
|
||||
description = List configured unit tests
|
||||
build unit_test_list: unit_test_list
|
||||
''').format(unit_test_list="\\n".join(unit_test_list)))
|
||||
f.write(textwrap.dedent('''\
|
||||
build always: phony
|
||||
rule scylla_version_gen
|
||||
@@ -2017,9 +1957,6 @@ with open(buildfile_tmp, 'w') as f:
|
||||
rule debian_files_gen
|
||||
command = ./dist/debian/debian_files_gen.py
|
||||
build $builddir/debian/debian: debian_files_gen | always
|
||||
rule extract_node_exporter
|
||||
command = tar -C build -xvpf {node_exporter_filename} --no-same-owner && rm -rfv build/node_exporter && mv -v build/{node_exporter_dirname} build/node_exporter
|
||||
build $builddir/node_exporter: extract_node_exporter | always
|
||||
''').format(**globals()))
|
||||
''').format(modes_list=' '.join(build_modes), **globals()))
|
||||
|
||||
os.rename(buildfile_tmp, buildfile)
|
||||
|
||||
@@ -20,47 +20,44 @@
|
||||
*/
|
||||
|
||||
#include "connection_notifier.hh"
|
||||
#include "db/query_context.hh"
|
||||
#include "cql3/constants.hh"
|
||||
#include "database.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
sstring to_string(client_type ct) {
|
||||
namespace db::system_keyspace {
|
||||
extern const char *const CLIENTS;
|
||||
}
|
||||
|
||||
static sstring to_string(client_type ct) {
|
||||
switch (ct) {
|
||||
case client_type::cql: return "cql";
|
||||
case client_type::thrift: return "thrift";
|
||||
case client_type::alternator: return "alternator";
|
||||
default: throw std::runtime_error("Invalid client_type");
|
||||
}
|
||||
throw std::runtime_error("Invalid client_type");
|
||||
}
|
||||
|
||||
static sstring to_string(client_connection_stage ccs) {
|
||||
switch (ccs) {
|
||||
case client_connection_stage::established: return connection_stage_literal<client_connection_stage::established>;
|
||||
case client_connection_stage::authenticating: return connection_stage_literal<client_connection_stage::authenticating>;
|
||||
case client_connection_stage::ready: return connection_stage_literal<client_connection_stage::ready>;
|
||||
}
|
||||
throw std::runtime_error("Invalid client_connection_stage");
|
||||
}
|
||||
|
||||
future<> notify_new_client(client_data cd) {
|
||||
// FIXME: consider prepared statement
|
||||
const static sstring req
|
||||
= format("INSERT INTO system.{} (address, port, client_type, connection_stage, shard_id, protocol_version, username) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?);", db::system_keyspace::CLIENTS);
|
||||
= format("INSERT INTO system.{} (address, port, client_type, shard_id, protocol_version, username) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?);", db::system_keyspace::CLIENTS);
|
||||
|
||||
return db::qctx->execute_cql(req,
|
||||
std::move(cd.ip), cd.port, to_string(cd.ct), to_string(cd.connection_stage), cd.shard_id,
|
||||
return db::execute_cql(req,
|
||||
std::move(cd.ip), cd.port, to_string(cd.ct), cd.shard_id,
|
||||
cd.protocol_version.has_value() ? data_value(*cd.protocol_version) : data_value::make_null(int32_type),
|
||||
cd.username.value_or("anonymous")).discard_result();
|
||||
}
|
||||
|
||||
future<> notify_disconnected_client(net::inet_address addr, int port, client_type ct) {
|
||||
future<> notify_disconnected_client(gms::inet_address addr, client_type ct, int port) {
|
||||
// FIXME: consider prepared statement
|
||||
const static sstring req
|
||||
= format("DELETE FROM system.{} where address=? AND port=? AND client_type=?;",
|
||||
db::system_keyspace::CLIENTS);
|
||||
return db::qctx->execute_cql(req, std::move(addr), port, to_string(ct)).discard_result();
|
||||
return db::execute_cql(req, addr.addr(), port, to_string(ct)).discard_result();
|
||||
}
|
||||
|
||||
future<> clear_clientlist() {
|
||||
|
||||
@@ -20,65 +20,27 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "db/query_context.hh"
|
||||
|
||||
#include <seastar/net/inet_address.hh>
|
||||
#include "gms/inet_address.hh"
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include "seastarx.hh"
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace db::system_keyspace {
|
||||
extern const char *const CLIENTS;
|
||||
}
|
||||
|
||||
enum class client_type {
|
||||
cql = 0,
|
||||
thrift,
|
||||
alternator,
|
||||
};
|
||||
|
||||
sstring to_string(client_type ct);
|
||||
|
||||
enum class changed_column {
|
||||
username = 0,
|
||||
connection_stage,
|
||||
driver_name,
|
||||
driver_version,
|
||||
hostname,
|
||||
protocol_version,
|
||||
};
|
||||
|
||||
template <changed_column column> constexpr const char* column_literal = "";
|
||||
template <> inline constexpr const char* column_literal<changed_column::username> = "username";
|
||||
template <> inline constexpr const char* column_literal<changed_column::connection_stage> = "connection_stage";
|
||||
template <> inline constexpr const char* column_literal<changed_column::driver_name> = "driver_name";
|
||||
template <> inline constexpr const char* column_literal<changed_column::driver_version> = "driver_version";
|
||||
template <> inline constexpr const char* column_literal<changed_column::hostname> = "hostname";
|
||||
template <> inline constexpr const char* column_literal<changed_column::protocol_version> = "protocol_version";
|
||||
|
||||
enum class client_connection_stage {
|
||||
established = 0,
|
||||
authenticating,
|
||||
ready,
|
||||
};
|
||||
|
||||
template <client_connection_stage ccs> constexpr const char* connection_stage_literal = "";
|
||||
template <> inline constexpr const char* connection_stage_literal<client_connection_stage::established> = "ESTABLISHED";
|
||||
template <> inline constexpr const char* connection_stage_literal<client_connection_stage::authenticating> = "AUTHENTICATING";
|
||||
template <> inline constexpr const char* connection_stage_literal<client_connection_stage::ready> = "READY";
|
||||
|
||||
// Representation of a row in `system.clients'. std::optionals are for nullable cells.
|
||||
struct client_data {
|
||||
net::inet_address ip;
|
||||
gms::inet_address ip;
|
||||
int32_t port;
|
||||
client_type ct;
|
||||
client_connection_stage connection_stage = client_connection_stage::established;
|
||||
int32_t shard_id; /// ID of server-side shard which is processing the connection.
|
||||
|
||||
// `optional' column means that it's nullable (possibly because it's
|
||||
// unimplemented yet). If you want to fill ("implement") any of them,
|
||||
// remember to update the query in `notify_new_client()'.
|
||||
std::optional<sstring> connection_stage;
|
||||
std::optional<sstring> driver_name;
|
||||
std::optional<sstring> driver_version;
|
||||
std::optional<sstring> hostname;
|
||||
@@ -90,17 +52,6 @@ struct client_data {
|
||||
};
|
||||
|
||||
future<> notify_new_client(client_data cd);
|
||||
future<> notify_disconnected_client(net::inet_address addr, int port, client_type ct);
|
||||
future<> notify_disconnected_client(gms::inet_address addr, client_type ct, int port);
|
||||
|
||||
future<> clear_clientlist();
|
||||
|
||||
template <changed_column column_enum_val>
|
||||
struct notify_client_change {
|
||||
template <typename T>
|
||||
future<> operator()(net::inet_address addr, int port, client_type ct, T&& value) {
|
||||
const static sstring req
|
||||
= format("UPDATE system.{} SET {}=? WHERE address=? AND port=? AND client_type=?;",
|
||||
db::system_keyspace::CLIENTS, column_literal<column_enum_val>);
|
||||
|
||||
return db::qctx->execute_cql(req, std::forward<T>(value), std::move(addr), port, to_string(ct)).discard_result();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -36,9 +36,9 @@ converting_mutation_partition_applier::upgrade_cell(const abstract_type& new_typ
|
||||
atomic_cell::collection_member cm) {
|
||||
if (cell.is_live() && !old_type.is_counter()) {
|
||||
if (cell.is_live_and_has_ttl()) {
|
||||
return atomic_cell::make_live(new_type, cell.timestamp(), cell.value(), cell.expiry(), cell.ttl(), cm);
|
||||
return atomic_cell::make_live(new_type, cell.timestamp(), cell.value().linearize(), cell.expiry(), cell.ttl(), cm);
|
||||
}
|
||||
return atomic_cell::make_live(new_type, cell.timestamp(), cell.value(), cm);
|
||||
return atomic_cell::make_live(new_type, cell.timestamp(), cell.value().linearize(), cm);
|
||||
} else {
|
||||
return atomic_cell(new_type, cell);
|
||||
}
|
||||
|
||||
39
counters.cc
39
counters.cc
@@ -19,10 +19,16 @@
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "service/storage_service.hh"
|
||||
#include "counters.hh"
|
||||
#include "mutation.hh"
|
||||
#include "combine.hh"
|
||||
|
||||
counter_id counter_id::local()
|
||||
{
|
||||
return counter_id(service::get_local_storage_service().get_local_id());
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const counter_id& id) {
|
||||
return os << id.to_uuid();
|
||||
}
|
||||
@@ -118,14 +124,16 @@ void counter_cell_view::apply(const column_definition& cdef, atomic_cell_or_coll
|
||||
|
||||
assert(!dst_ac.is_counter_update());
|
||||
assert(!src_ac.is_counter_update());
|
||||
with_linearized(dst_ac, [&] (counter_cell_view dst_ccv) {
|
||||
with_linearized(src_ac, [&] (counter_cell_view src_ccv) {
|
||||
|
||||
auto src_ccv = counter_cell_view(src_ac);
|
||||
auto dst_ccv = counter_cell_view(dst_ac);
|
||||
if (dst_ccv.shard_count() >= src_ccv.shard_count()) {
|
||||
auto dst_amc = dst.as_mutable_atomic_cell(cdef);
|
||||
auto src_amc = src.as_mutable_atomic_cell(cdef);
|
||||
if (apply_in_place(cdef, dst_amc, src_amc)) {
|
||||
return;
|
||||
if (!dst_amc.is_value_fragmented() && !src_amc.is_value_fragmented()) {
|
||||
if (apply_in_place(cdef, dst_amc, src_amc)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,6 +148,8 @@ void counter_cell_view::apply(const column_definition& cdef, atomic_cell_or_coll
|
||||
|
||||
auto cell = result.build(std::max(dst_ac.timestamp(), src_ac.timestamp()));
|
||||
src = std::exchange(dst, atomic_cell_or_collection(std::move(cell)));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, atomic_cell_view b)
|
||||
@@ -154,8 +164,8 @@ std::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, ato
|
||||
return { };
|
||||
}
|
||||
|
||||
auto a_ccv = counter_cell_view(a);
|
||||
auto b_ccv = counter_cell_view(b);
|
||||
return with_linearized(a, [&] (counter_cell_view a_ccv) {
|
||||
return with_linearized(b, [&] (counter_cell_view b_ccv) {
|
||||
auto a_shards = a_ccv.shards();
|
||||
auto b_shards = b_ccv.shards();
|
||||
|
||||
@@ -182,13 +192,15 @@ std::optional<atomic_cell> counter_cell_view::difference(atomic_cell_view a, ato
|
||||
diff = atomic_cell::make_live(*counter_type, a.timestamp(), bytes_view());
|
||||
}
|
||||
return diff;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset, utils::UUID local_id) {
|
||||
void transform_counter_updates_to_shards(mutation& m, const mutation* current_state, uint64_t clock_offset) {
|
||||
// FIXME: allow current_state to be frozen_mutation
|
||||
|
||||
auto transform_new_row_to_shards = [&s = *m.schema(), clock_offset, local_id] (column_kind kind, auto& cells) {
|
||||
auto transform_new_row_to_shards = [&s = *m.schema(), clock_offset] (column_kind kind, auto& cells) {
|
||||
cells.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
|
||||
auto& cdef = s.column_at(kind, id);
|
||||
auto acv = ac_o_c.as_atomic_cell(cdef);
|
||||
@@ -196,7 +208,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
||||
return; // continue -- we are in lambda
|
||||
}
|
||||
auto delta = acv.counter_update_value();
|
||||
auto cs = counter_shard(counter_id(local_id), delta, clock_offset + 1);
|
||||
auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1);
|
||||
ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
|
||||
});
|
||||
};
|
||||
@@ -211,7 +223,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
||||
|
||||
clustering_key::less_compare cmp(*m.schema());
|
||||
|
||||
auto transform_row_to_shards = [&s = *m.schema(), clock_offset, local_id] (column_kind kind, auto& transformee, auto& state) {
|
||||
auto transform_row_to_shards = [&s = *m.schema(), clock_offset] (column_kind kind, auto& transformee, auto& state) {
|
||||
std::deque<std::pair<column_id, counter_shard>> shards;
|
||||
state.for_each_cell([&] (column_id id, const atomic_cell_or_collection& ac_o_c) {
|
||||
auto& cdef = s.column_at(kind, id);
|
||||
@@ -219,13 +231,14 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
||||
if (!acv.is_live()) {
|
||||
return; // continue -- we are in lambda
|
||||
}
|
||||
auto ccv = counter_cell_view(acv);
|
||||
auto cs = ccv.get_shard(counter_id(local_id));
|
||||
counter_cell_view::with_linearized(acv, [&] (counter_cell_view ccv) {
|
||||
auto cs = ccv.local_shard();
|
||||
if (!cs) {
|
||||
return; // continue
|
||||
}
|
||||
shards.emplace_back(std::make_pair(id, counter_shard(*cs)));
|
||||
});
|
||||
});
|
||||
|
||||
transformee.for_each_cell([&] (column_id id, atomic_cell_or_collection& ac_o_c) {
|
||||
auto& cdef = s.column_at(kind, id);
|
||||
@@ -240,7 +253,7 @@ void transform_counter_updates_to_shards(mutation& m, const mutation* current_st
|
||||
auto delta = acv.counter_update_value();
|
||||
|
||||
if (shards.empty() || shards.front().first > id) {
|
||||
auto cs = counter_shard(counter_id(local_id), delta, clock_offset + 1);
|
||||
auto cs = counter_shard(counter_id::local(), delta, clock_offset + 1);
|
||||
ac_o_c = counter_cell_builder::from_single_shard(acv.timestamp(), cs);
|
||||
} else {
|
||||
auto& cs = shards.front().second;
|
||||
|
||||
143
counters.hh
143
counters.hh
@@ -61,6 +61,8 @@ public:
|
||||
return !(*this == other);
|
||||
}
|
||||
public:
|
||||
static counter_id local();
|
||||
|
||||
// For tests.
|
||||
static counter_id generate_random() {
|
||||
return counter_id(utils::make_random_uuid());
|
||||
@@ -81,20 +83,21 @@ class basic_counter_shard_view {
|
||||
total_size = unsigned(logical_clock) + sizeof(int64_t),
|
||||
};
|
||||
private:
|
||||
managed_bytes_basic_view<is_mutable> _base;
|
||||
using pointer_type = std::conditional_t<is_mutable == mutable_view::no, const signed char*, signed char*>;
|
||||
pointer_type _base;
|
||||
private:
|
||||
template<typename T>
|
||||
T read(offset off) const {
|
||||
auto v = _base;
|
||||
v.remove_prefix(size_t(off));
|
||||
return read_simple_native<T>(v);
|
||||
T value;
|
||||
std::copy_n(_base + static_cast<unsigned>(off), sizeof(T), reinterpret_cast<signed char*>(&value));
|
||||
return value;
|
||||
}
|
||||
public:
|
||||
static constexpr auto size = size_t(offset::total_size);
|
||||
public:
|
||||
basic_counter_shard_view() = default;
|
||||
explicit basic_counter_shard_view(managed_bytes_basic_view<is_mutable> v) noexcept
|
||||
: _base(v) { }
|
||||
explicit basic_counter_shard_view(pointer_type ptr) noexcept
|
||||
: _base(ptr) { }
|
||||
|
||||
counter_id id() const { return read<counter_id>(offset::id); }
|
||||
int64_t value() const { return read<int64_t>(offset::value); }
|
||||
@@ -105,24 +108,15 @@ public:
|
||||
static constexpr size_t size = size_t(offset::total_size) - off;
|
||||
|
||||
signed char tmp[size];
|
||||
auto tmp_view = single_fragmented_mutable_view(bytes_mutable_view(std::data(tmp), std::size(tmp)));
|
||||
|
||||
managed_bytes_mutable_view this_view = _base.substr(off, size);
|
||||
managed_bytes_mutable_view other_view = other._base.substr(off, size);
|
||||
|
||||
copy_fragmented_view(tmp_view, this_view);
|
||||
copy_fragmented_view(this_view, other_view);
|
||||
copy_fragmented_view(other_view, tmp_view);
|
||||
std::copy_n(_base + off, size, tmp);
|
||||
std::copy_n(other._base + off, size, _base + off);
|
||||
std::copy_n(tmp, size, other._base + off);
|
||||
}
|
||||
|
||||
void set_value_and_clock(const basic_counter_shard_view& other) noexcept {
|
||||
static constexpr size_t off = size_t(offset::value);
|
||||
static constexpr size_t size = size_t(offset::total_size) - off;
|
||||
|
||||
managed_bytes_mutable_view this_view = _base.substr(off, size);
|
||||
managed_bytes_mutable_view other_view = other._base.substr(off, size);
|
||||
|
||||
copy_fragmented_view(this_view, other_view);
|
||||
std::copy_n(other._base + off, size, _base + off);
|
||||
}
|
||||
|
||||
bool operator==(const basic_counter_shard_view& other) const {
|
||||
@@ -148,6 +142,11 @@ class counter_shard {
|
||||
counter_id _id;
|
||||
int64_t _value;
|
||||
int64_t _logical_clock;
|
||||
private:
|
||||
template<typename T>
|
||||
static void write(const T& value, bytes::iterator& out) {
|
||||
out = std::copy_n(reinterpret_cast<const signed char*>(&value), sizeof(T), out);
|
||||
}
|
||||
private:
|
||||
// Shared logic for applying counter_shards and counter_shard_views.
|
||||
// T is either counter_shard or basic_counter_shard_view<U>.
|
||||
@@ -198,10 +197,10 @@ public:
|
||||
static constexpr size_t serialized_size() {
|
||||
return counter_shard_view::size;
|
||||
}
|
||||
void serialize(atomic_cell_value_mutable_view& out) const {
|
||||
write_native<counter_id>(out, _id);
|
||||
write_native<int64_t>(out, _value);
|
||||
write_native<int64_t>(out, _logical_clock);
|
||||
void serialize(bytes::iterator& out) const {
|
||||
write(_id, out);
|
||||
write(_value, out);
|
||||
write(_logical_clock, out);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -238,7 +237,7 @@ public:
|
||||
size_t serialized_size() const {
|
||||
return _shards.size() * counter_shard::serialized_size();
|
||||
}
|
||||
void serialize(atomic_cell_value_mutable_view& out) const {
|
||||
void serialize(bytes::iterator& out) const {
|
||||
for (auto&& cs : _shards) {
|
||||
cs.serialize(out);
|
||||
}
|
||||
@@ -249,30 +248,36 @@ public:
|
||||
}
|
||||
|
||||
atomic_cell build(api::timestamp_type timestamp) const {
|
||||
// If we can assume that the counter shards never cross fragment boundaries
|
||||
// the serialisation code gets much simpler.
|
||||
static_assert(data::cell::maximum_external_chunk_length % counter_shard::serialized_size() == 0);
|
||||
|
||||
auto ac = atomic_cell::make_live_uninitialized(*counter_type, timestamp, serialized_size());
|
||||
|
||||
auto dst = ac.value();
|
||||
auto dst_it = ac.value().begin();
|
||||
auto dst_current = *dst_it++;
|
||||
for (auto&& cs : _shards) {
|
||||
cs.serialize(dst);
|
||||
if (dst_current.empty()) {
|
||||
dst_current = *dst_it++;
|
||||
}
|
||||
assert(!dst_current.empty());
|
||||
auto value_dst = dst_current.data();
|
||||
cs.serialize(value_dst);
|
||||
dst_current.remove_prefix(counter_shard::serialized_size());
|
||||
}
|
||||
return ac;
|
||||
}
|
||||
|
||||
static atomic_cell from_single_shard(api::timestamp_type timestamp, const counter_shard& cs) {
|
||||
// We don't really need to bother with fragmentation here.
|
||||
static_assert(data::cell::maximum_external_chunk_length >= counter_shard::serialized_size());
|
||||
auto ac = atomic_cell::make_live_uninitialized(*counter_type, timestamp, counter_shard::serialized_size());
|
||||
auto dst = ac.value();
|
||||
auto dst = ac.value().first_fragment().begin();
|
||||
cs.serialize(dst);
|
||||
return ac;
|
||||
}
|
||||
|
||||
class inserter_iterator {
|
||||
public:
|
||||
using iterator_category = std::output_iterator_tag;
|
||||
using value_type = counter_shard;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = counter_shard*;
|
||||
using reference = counter_shard&;
|
||||
private:
|
||||
class inserter_iterator : public std::iterator<std::output_iterator_tag, counter_shard> {
|
||||
counter_cell_builder* _builder;
|
||||
public:
|
||||
explicit inserter_iterator(counter_cell_builder& b) : _builder(&b) { }
|
||||
@@ -299,22 +304,20 @@ public:
|
||||
template<mutable_view is_mutable>
|
||||
class basic_counter_cell_view {
|
||||
protected:
|
||||
using linearized_value_view = std::conditional_t<is_mutable == mutable_view::no,
|
||||
bytes_view, bytes_mutable_view>;
|
||||
using pointer_type = std::conditional_t<is_mutable == mutable_view::no,
|
||||
bytes_view::const_pointer, bytes_mutable_view::pointer>;
|
||||
basic_atomic_cell_view<is_mutable> _cell;
|
||||
linearized_value_view _value;
|
||||
private:
|
||||
class shard_iterator {
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = basic_counter_shard_view<is_mutable>;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = basic_counter_shard_view<is_mutable>*;
|
||||
using reference = basic_counter_shard_view<is_mutable>&;
|
||||
private:
|
||||
managed_bytes_basic_view<is_mutable> _current;
|
||||
class shard_iterator : public std::iterator<std::input_iterator_tag, basic_counter_shard_view<is_mutable>> {
|
||||
pointer_type _current;
|
||||
basic_counter_shard_view<is_mutable> _current_view;
|
||||
size_t _pos = 0;
|
||||
public:
|
||||
shard_iterator(managed_bytes_basic_view<is_mutable> v, size_t offset) noexcept
|
||||
: _current(v), _current_view(_current), _pos(offset) { }
|
||||
shard_iterator() = default;
|
||||
shard_iterator(pointer_type ptr) noexcept
|
||||
: _current(ptr), _current_view(ptr) { }
|
||||
|
||||
basic_counter_shard_view<is_mutable>& operator*() noexcept {
|
||||
return _current_view;
|
||||
@@ -323,8 +326,8 @@ private:
|
||||
return &_current_view;
|
||||
}
|
||||
shard_iterator& operator++() noexcept {
|
||||
_pos += counter_shard_view::size;
|
||||
_current_view = basic_counter_shard_view<is_mutable>(_current.substr(_pos, counter_shard_view::size));
|
||||
_current += counter_shard_view::size;
|
||||
_current_view = basic_counter_shard_view<is_mutable>(_current);
|
||||
return *this;
|
||||
}
|
||||
shard_iterator operator++(int) noexcept {
|
||||
@@ -333,8 +336,8 @@ private:
|
||||
return it;
|
||||
}
|
||||
shard_iterator& operator--() noexcept {
|
||||
_pos -= counter_shard_view::size;
|
||||
_current_view = basic_counter_shard_view<is_mutable>(_current.substr(_pos, counter_shard_view::size));
|
||||
_current -= counter_shard_view::size;
|
||||
_current_view = basic_counter_shard_view<is_mutable>(_current);
|
||||
return *this;
|
||||
}
|
||||
shard_iterator operator--(int) noexcept {
|
||||
@@ -343,29 +346,31 @@ private:
|
||||
return it;
|
||||
}
|
||||
bool operator==(const shard_iterator& other) const noexcept {
|
||||
return _pos == other._pos;
|
||||
return _current == other._current;
|
||||
}
|
||||
bool operator!=(const shard_iterator& other) const noexcept {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
public:
|
||||
boost::iterator_range<shard_iterator> shards() const {
|
||||
auto value = _cell.value();
|
||||
auto begin = shard_iterator(value, 0);
|
||||
auto end = shard_iterator(value, value.size());
|
||||
auto begin = shard_iterator(_value.data());
|
||||
auto end = shard_iterator(_value.data() + _value.size());
|
||||
return boost::make_iterator_range(begin, end);
|
||||
}
|
||||
|
||||
size_t shard_count() const {
|
||||
return _cell.value().size() / counter_shard_view::size;
|
||||
return _cell.value().size_bytes() / counter_shard_view::size;
|
||||
}
|
||||
public:
|
||||
protected:
|
||||
// ac must be a live counter cell
|
||||
explicit basic_counter_cell_view(basic_atomic_cell_view<is_mutable> ac) noexcept
|
||||
: _cell(ac)
|
||||
explicit basic_counter_cell_view(basic_atomic_cell_view<is_mutable> ac, linearized_value_view vv) noexcept
|
||||
: _cell(ac), _value(vv)
|
||||
{
|
||||
assert(_cell.is_live());
|
||||
assert(!_cell.is_counter_update());
|
||||
}
|
||||
|
||||
public:
|
||||
api::timestamp_type timestamp() const { return _cell.timestamp(); }
|
||||
|
||||
static data_type total_value_type() { return long_type; }
|
||||
@@ -386,6 +391,11 @@ public:
|
||||
return *it;
|
||||
}
|
||||
|
||||
std::optional<counter_shard_view> local_shard() const {
|
||||
// TODO: consider caching local shard position
|
||||
return get_shard(counter_id::local());
|
||||
}
|
||||
|
||||
bool operator==(const basic_counter_cell_view& other) const {
|
||||
return timestamp() == other.timestamp() && boost::equal(shards(), other.shards());
|
||||
}
|
||||
@@ -394,6 +404,14 @@ public:
|
||||
struct counter_cell_view : basic_counter_cell_view<mutable_view::no> {
|
||||
using basic_counter_cell_view::basic_counter_cell_view;
|
||||
|
||||
template<typename Function>
|
||||
static decltype(auto) with_linearized(basic_atomic_cell_view<mutable_view::no> ac, Function&& fn) {
|
||||
return ac.value().with_linearized([&] (bytes_view value_view) {
|
||||
counter_cell_view ccv(ac, value_view);
|
||||
return fn(ccv);
|
||||
});
|
||||
}
|
||||
|
||||
// Reversibly applies two counter cells, at least one of them must be live.
|
||||
static void apply(const column_definition& cdef, atomic_cell_or_collection& dst, atomic_cell_or_collection& src);
|
||||
|
||||
@@ -408,8 +426,9 @@ struct counter_cell_mutable_view : basic_counter_cell_view<mutable_view::yes> {
|
||||
using basic_counter_cell_view::basic_counter_cell_view;
|
||||
|
||||
explicit counter_cell_mutable_view(atomic_cell_mutable_view ac) noexcept
|
||||
: basic_counter_cell_view<mutable_view::yes>(ac)
|
||||
: basic_counter_cell_view<mutable_view::yes>(ac, ac.value().first_fragment())
|
||||
{
|
||||
assert(!ac.value().is_fragmented());
|
||||
}
|
||||
|
||||
void set_timestamp(api::timestamp_type ts) { _cell.set_timestamp(ts); }
|
||||
@@ -418,7 +437,7 @@ struct counter_cell_mutable_view : basic_counter_cell_view<mutable_view::yes> {
|
||||
// Transforms mutation dst from counter updates to counter shards using state
|
||||
// stored in current_state.
|
||||
// If current_state is present it has to be in the same schema as dst.
|
||||
void transform_counter_updates_to_shards(mutation& dst, const mutation* current_state, uint64_t clock_offset, utils::UUID local_id);
|
||||
void transform_counter_updates_to_shards(mutation& dst, const mutation* current_state, uint64_t clock_offset);
|
||||
|
||||
template<>
|
||||
struct appending_hash<counter_shard_view> {
|
||||
|
||||
34
cql3/Cql.g
34
cql3/Cql.g
@@ -394,7 +394,6 @@ selectStatement returns [std::unique_ptr<raw::select_statement> expr]
|
||||
bool allow_filtering = false;
|
||||
bool is_json = false;
|
||||
bool bypass_cache = false;
|
||||
auto attrs = std::make_unique<cql3::attributes::raw>();
|
||||
}
|
||||
: K_SELECT (
|
||||
( K_JSON { is_json = true; } )?
|
||||
@@ -409,12 +408,11 @@ selectStatement returns [std::unique_ptr<raw::select_statement> expr]
|
||||
( K_LIMIT rows=intValue { limit = rows; } )?
|
||||
( K_ALLOW K_FILTERING { allow_filtering = true; } )?
|
||||
( K_BYPASS K_CACHE { bypass_cache = true; })?
|
||||
( usingClause[attrs] )?
|
||||
{
|
||||
auto params = make_lw_shared<raw::select_statement::parameters>(std::move(orderings), is_distinct, allow_filtering, is_json, bypass_cache);
|
||||
$expr = std::make_unique<raw::select_statement>(std::move(cf), std::move(params),
|
||||
std::move(sclause), std::move(wclause), std::move(limit), std::move(per_partition_limit),
|
||||
std::move(gbcolumns), std::move(attrs));
|
||||
std::move(gbcolumns));
|
||||
}
|
||||
;
|
||||
|
||||
@@ -523,7 +521,6 @@ usingClause[std::unique_ptr<cql3::attributes::raw>& attrs]
|
||||
usingClauseObjective[std::unique_ptr<cql3::attributes::raw>& attrs]
|
||||
: K_TIMESTAMP ts=intValue { attrs->timestamp = ts; }
|
||||
| K_TTL t=intValue { attrs->time_to_live = t; }
|
||||
| K_TIMEOUT to=term { attrs->timeout = to; }
|
||||
;
|
||||
|
||||
/**
|
||||
@@ -931,7 +928,7 @@ alterKeyspaceStatement returns [std::unique_ptr<cql3::statements::alter_keyspace
|
||||
alterTableStatement returns [std::unique_ptr<alter_table_statement> expr]
|
||||
@init {
|
||||
alter_table_statement::type type;
|
||||
auto props = cql3::statements::cf_prop_defs();
|
||||
auto props = make_shared<cql3::statements::cf_prop_defs>();
|
||||
std::vector<alter_table_statement::column_change> column_changes;
|
||||
std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>, shared_ptr<cql3::column_identifier::raw>>> renames;
|
||||
}
|
||||
@@ -947,7 +944,7 @@ alterTableStatement returns [std::unique_ptr<alter_table_statement> expr]
|
||||
| '(' id1=cident { column_changes.emplace_back(alter_table_statement::column_change{id1}); }
|
||||
(',' idn=cident { column_changes.emplace_back(alter_table_statement::column_change{idn}); } )* ')'
|
||||
)
|
||||
| K_WITH properties[props] { type = alter_table_statement::type::opts; }
|
||||
| K_WITH properties[*props] { type = alter_table_statement::type::opts; }
|
||||
| K_RENAME { type = alter_table_statement::type::rename; }
|
||||
id1=cident K_TO toId1=cident { renames.emplace_back(id1, toId1); }
|
||||
( K_AND idn=cident K_TO toIdn=cident { renames.emplace_back(idn, toIdn); } )*
|
||||
@@ -987,9 +984,9 @@ alterTypeStatement returns [std::unique_ptr<alter_type_statement> expr]
|
||||
*/
|
||||
alterViewStatement returns [std::unique_ptr<alter_view_statement> expr]
|
||||
@init {
|
||||
auto props = cql3::statements::cf_prop_defs();
|
||||
auto props = make_shared<cql3::statements::cf_prop_defs>();
|
||||
}
|
||||
: K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props]
|
||||
: K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[*props]
|
||||
{
|
||||
$expr = std::make_unique<alter_view_statement>(std::move(cf), std::move(props));
|
||||
}
|
||||
@@ -1124,7 +1121,7 @@ dataResource returns [uninitialized<auth::resource> res]
|
||||
: K_ALL K_KEYSPACES { $res = auth::resource(auth::resource_kind::data); }
|
||||
| K_KEYSPACE ks = keyspaceName { $res = auth::make_data_resource($ks.id); }
|
||||
| ( K_COLUMNFAMILY )? cf = columnFamilyName
|
||||
{ $res = auth::make_data_resource($cf.name.has_keyspace() ? $cf.name.get_keyspace() : "", $cf.name.get_column_family()); }
|
||||
{ $res = auth::make_data_resource($cf.name->get_keyspace(), $cf.name->get_column_family()); }
|
||||
;
|
||||
|
||||
roleResource returns [uninitialized<auth::resource> res]
|
||||
@@ -1261,8 +1258,8 @@ ident returns [shared_ptr<cql3::column_identifier> id]
|
||||
|
||||
// Keyspace & Column family names
|
||||
keyspaceName returns [sstring id]
|
||||
@init { auto name = cql3::cf_name(); }
|
||||
: ksName[name] { $id = name.get_keyspace(); }
|
||||
@init { auto name = make_shared<cql3::cf_name>(); }
|
||||
: ksName[*name] { $id = name->get_keyspace(); }
|
||||
;
|
||||
|
||||
indexName returns [::shared_ptr<cql3::index_name> name]
|
||||
@@ -1270,9 +1267,9 @@ indexName returns [::shared_ptr<cql3::index_name> name]
|
||||
: (ksName[*name] '.')? idxName[*name]
|
||||
;
|
||||
|
||||
columnFamilyName returns [cql3::cf_name name]
|
||||
@init { $name = cql3::cf_name(); }
|
||||
: (ksName[name] '.')? cfName[name]
|
||||
columnFamilyName returns [::shared_ptr<cql3::cf_name> name]
|
||||
@init { $name = ::make_shared<cql3::cf_name>(); }
|
||||
: (ksName[*name] '.')? cfName[*name]
|
||||
;
|
||||
|
||||
userTypeName returns [uninitialized<cql3::ut_name> name]
|
||||
@@ -1549,10 +1546,6 @@ relation[std::vector<cql3::relation_ptr>& clauses]
|
||||
{
|
||||
$clauses.emplace_back(cql3::multi_column_relation::create_non_in_relation(ids, type, literal));
|
||||
}
|
||||
| type=relationType K_SCYLLA_CLUSTERING_BOUND literal=tupleLiteral /* (a, b, c) > (1, 2, 3) or (a, b, c) > (?, ?, ?) */
|
||||
{
|
||||
$clauses.emplace_back(cql3::multi_column_relation::create_scylla_clustering_bound_non_in_relation(ids, type, literal));
|
||||
}
|
||||
| type=relationType tupleMarker=markerForTuple /* (a, b, c) >= ? */
|
||||
{ $clauses.emplace_back(cql3::multi_column_relation::create_non_in_relation(ids, type, tupleMarker)); }
|
||||
)
|
||||
@@ -1768,7 +1761,6 @@ basic_unreserved_keyword returns [sstring str]
|
||||
| K_PER
|
||||
| K_PARTITION
|
||||
| K_GROUP
|
||||
| K_TIMEOUT
|
||||
) { $str = $k.text; }
|
||||
;
|
||||
|
||||
@@ -1919,15 +1911,11 @@ K_PARTITION: P A R T I T I O N;
|
||||
|
||||
K_SCYLLA_TIMEUUID_LIST_INDEX: S C Y L L A '_' T I M E U U I D '_' L I S T '_' I N D E X;
|
||||
K_SCYLLA_COUNTER_SHARD_LIST: S C Y L L A '_' C O U N T E R '_' S H A R D '_' L I S T;
|
||||
K_SCYLLA_CLUSTERING_BOUND: S C Y L L A '_' C L U S T E R I N G '_' B O U N D;
|
||||
|
||||
|
||||
K_GROUP: G R O U P;
|
||||
|
||||
K_LIKE: L I K E;
|
||||
|
||||
K_TIMEOUT: T I M E O U T;
|
||||
|
||||
// Case-insensitive alpha characters
|
||||
fragment A: ('a'|'A');
|
||||
fragment B: ('b'|'B');
|
||||
|
||||
@@ -70,11 +70,11 @@ abstract_marker::raw::raw(int32_t bind_index)
|
||||
::shared_ptr<term> abstract_marker::raw::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const
|
||||
{
|
||||
if (receiver->type->is_collection()) {
|
||||
if (receiver->type->without_reversed().is_list()) {
|
||||
if (receiver->type->get_kind() == abstract_type::kind::list) {
|
||||
return ::make_shared<lists::marker>(_bind_index, receiver);
|
||||
} else if (receiver->type->without_reversed().is_set()) {
|
||||
} else if (receiver->type->get_kind() == abstract_type::kind::set) {
|
||||
return ::make_shared<sets::marker>(_bind_index, receiver);
|
||||
} else if (receiver->type->without_reversed().is_map()) {
|
||||
} else if (receiver->type->get_kind() == abstract_type::kind::map) {
|
||||
return ::make_shared<maps::marker>(_bind_index, receiver);
|
||||
}
|
||||
assert(0);
|
||||
|
||||
@@ -44,13 +44,12 @@
|
||||
namespace cql3 {
|
||||
|
||||
std::unique_ptr<attributes> attributes::none() {
|
||||
return std::unique_ptr<attributes>{new attributes{{}, {}, {}}};
|
||||
return std::unique_ptr<attributes>{new attributes{{}, {}}};
|
||||
}
|
||||
|
||||
attributes::attributes(::shared_ptr<term>&& timestamp, ::shared_ptr<term>&& time_to_live, ::shared_ptr<term>&& timeout)
|
||||
attributes::attributes(::shared_ptr<term>&& timestamp, ::shared_ptr<term>&& time_to_live)
|
||||
: _timestamp{std::move(timestamp)}
|
||||
, _time_to_live{std::move(time_to_live)}
|
||||
, _timeout{std::move(timeout)}
|
||||
{ }
|
||||
|
||||
bool attributes::is_timestamp_set() const {
|
||||
@@ -61,10 +60,6 @@ bool attributes::is_time_to_live_set() const {
|
||||
return bool(_time_to_live);
|
||||
}
|
||||
|
||||
bool attributes::is_timeout_set() const {
|
||||
return bool(_timeout);
|
||||
}
|
||||
|
||||
int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
|
||||
if (!_timestamp) {
|
||||
return now;
|
||||
@@ -77,12 +72,14 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
|
||||
if (tval.is_unset_value()) {
|
||||
return now;
|
||||
}
|
||||
return with_linearized(*tval, [&] (bytes_view val) {
|
||||
try {
|
||||
data_type_for<int64_t>()->validate(*tval, options.get_cql_serialization_format());
|
||||
data_type_for<int64_t>()->validate(val, options.get_cql_serialization_format());
|
||||
} catch (marshal_exception& e) {
|
||||
throw exceptions::invalid_request_exception("Invalid timestamp value");
|
||||
}
|
||||
return value_cast<int64_t>(data_type_for<int64_t>()->deserialize(*tval));
|
||||
return value_cast<int64_t>(data_type_for<int64_t>()->deserialize(val));
|
||||
});
|
||||
}
|
||||
|
||||
int32_t attributes::get_time_to_live(const query_options& options) {
|
||||
@@ -96,15 +93,16 @@ int32_t attributes::get_time_to_live(const query_options& options) {
|
||||
if (tval.is_unset_value()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto ttl = with_linearized(*tval, [&] (bytes_view val) {
|
||||
try {
|
||||
data_type_for<int32_t>()->validate(*tval, options.get_cql_serialization_format());
|
||||
data_type_for<int32_t>()->validate(val, options.get_cql_serialization_format());
|
||||
}
|
||||
catch (marshal_exception& e) {
|
||||
throw exceptions::invalid_request_exception("Invalid TTL value");
|
||||
}
|
||||
auto ttl = value_cast<int32_t>(data_type_for<int32_t>()->deserialize(*tval));
|
||||
|
||||
return value_cast<int32_t>(data_type_for<int32_t>()->deserialize(val));
|
||||
});
|
||||
if (ttl < 0) {
|
||||
throw exceptions::invalid_request_exception("A TTL must be greater or equal to 0");
|
||||
}
|
||||
@@ -117,25 +115,6 @@ int32_t attributes::get_time_to_live(const query_options& options) {
|
||||
return ttl;
|
||||
}
|
||||
|
||||
|
||||
db::timeout_clock::duration attributes::get_timeout(const query_options& options) const {
|
||||
auto timeout = _timeout->bind_and_get(options);
|
||||
if (timeout.is_null() || timeout.is_unset_value()) {
|
||||
throw exceptions::invalid_request_exception("Timeout value cannot be unset/null");
|
||||
}
|
||||
cql_duration duration = value_cast<cql_duration>(duration_type->deserialize(*timeout));
|
||||
if (duration.months || duration.days) {
|
||||
throw exceptions::invalid_request_exception("Timeout values cannot be expressed in days/months");
|
||||
}
|
||||
if (duration.nanoseconds % 1'000'000 != 0) {
|
||||
throw exceptions::invalid_request_exception("Timeout values cannot have granularity finer than milliseconds");
|
||||
}
|
||||
if (duration.nanoseconds < 0) {
|
||||
throw exceptions::invalid_request_exception("Timeout values must be non-negative");
|
||||
}
|
||||
return std::chrono::duration_cast<db::timeout_clock::duration>(std::chrono::nanoseconds(duration.nanoseconds));
|
||||
}
|
||||
|
||||
void attributes::collect_marker_specification(variable_specifications& bound_names) const {
|
||||
if (_timestamp) {
|
||||
_timestamp->collect_marker_specification(bound_names);
|
||||
@@ -143,16 +122,12 @@ void attributes::collect_marker_specification(variable_specifications& bound_nam
|
||||
if (_time_to_live) {
|
||||
_time_to_live->collect_marker_specification(bound_names);
|
||||
}
|
||||
if (_timeout) {
|
||||
_timeout->collect_marker_specification(bound_names);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<attributes> attributes::raw::prepare(database& db, const sstring& ks_name, const sstring& cf_name) const {
|
||||
auto ts = !timestamp ? ::shared_ptr<term>{} : timestamp->prepare(db, ks_name, timestamp_receiver(ks_name, cf_name));
|
||||
auto ttl = !time_to_live ? ::shared_ptr<term>{} : time_to_live->prepare(db, ks_name, time_to_live_receiver(ks_name, cf_name));
|
||||
auto to = !timeout ? ::shared_ptr<term>{} : timeout->prepare(db, ks_name, timeout_receiver(ks_name, cf_name));
|
||||
return std::unique_ptr<attributes>{new attributes{std::move(ts), std::move(ttl), std::move(to)}};
|
||||
return std::unique_ptr<attributes>{new attributes{std::move(ts), std::move(ttl)}};
|
||||
}
|
||||
|
||||
lw_shared_ptr<column_specification> attributes::raw::timestamp_receiver(const sstring& ks_name, const sstring& cf_name) const {
|
||||
@@ -163,8 +138,4 @@ lw_shared_ptr<column_specification> attributes::raw::time_to_live_receiver(const
|
||||
return make_lw_shared<column_specification>(ks_name, cf_name, ::make_shared<column_identifier>("[ttl]", true), data_type_for<int32_t>());
|
||||
}
|
||||
|
||||
lw_shared_ptr<column_specification> attributes::raw::timeout_receiver(const sstring& ks_name, const sstring& cf_name) const {
|
||||
return make_lw_shared<column_specification>(ks_name, cf_name, ::make_shared<column_identifier>("[timeout]", true), duration_type);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -54,39 +54,31 @@ class attributes final {
|
||||
private:
|
||||
const ::shared_ptr<term> _timestamp;
|
||||
const ::shared_ptr<term> _time_to_live;
|
||||
const ::shared_ptr<term> _timeout;
|
||||
public:
|
||||
static std::unique_ptr<attributes> none();
|
||||
private:
|
||||
attributes(::shared_ptr<term>&& timestamp, ::shared_ptr<term>&& time_to_live, ::shared_ptr<term>&& timeout);
|
||||
attributes(::shared_ptr<term>&& timestamp, ::shared_ptr<term>&& time_to_live);
|
||||
public:
|
||||
bool is_timestamp_set() const;
|
||||
|
||||
bool is_time_to_live_set() const;
|
||||
|
||||
bool is_timeout_set() const;
|
||||
|
||||
int64_t get_timestamp(int64_t now, const query_options& options);
|
||||
|
||||
int32_t get_time_to_live(const query_options& options);
|
||||
|
||||
db::timeout_clock::duration get_timeout(const query_options& options) const;
|
||||
|
||||
void collect_marker_specification(variable_specifications& bound_names) const;
|
||||
|
||||
class raw final {
|
||||
public:
|
||||
::shared_ptr<term::raw> timestamp;
|
||||
::shared_ptr<term::raw> time_to_live;
|
||||
::shared_ptr<term::raw> timeout;
|
||||
|
||||
std::unique_ptr<attributes> prepare(database& db, const sstring& ks_name, const sstring& cf_name) const;
|
||||
private:
|
||||
lw_shared_ptr<column_specification> timestamp_receiver(const sstring& ks_name, const sstring& cf_name) const;
|
||||
|
||||
lw_shared_ptr<column_specification> time_to_live_receiver(const sstring& ks_name, const sstring& cf_name) const;
|
||||
|
||||
lw_shared_ptr<column_specification> timeout_receiver(const sstring& ks_name, const sstring& cf_name) const;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -35,28 +35,6 @@ struct authorized_prepared_statements_cache_size {
|
||||
class authorized_prepared_statements_cache_key {
|
||||
public:
|
||||
using cache_key_type = std::pair<auth::authenticated_user, typename cql3::prepared_cache_key_type::cache_key_type>;
|
||||
|
||||
struct view {
|
||||
const auth::authenticated_user& user_ref;
|
||||
const cql3::prepared_cache_key_type& prep_cache_key_ref;
|
||||
};
|
||||
|
||||
struct view_hasher {
|
||||
size_t operator()(const view& kv) {
|
||||
return cql3::authorized_prepared_statements_cache_key::hash(kv.user_ref, kv.prep_cache_key_ref.key());
|
||||
}
|
||||
};
|
||||
|
||||
struct view_equal {
|
||||
bool operator()(const authorized_prepared_statements_cache_key& k1, const view& k2) {
|
||||
return k1.key().first == k2.user_ref && k1.key().second == k2.prep_cache_key_ref.key();
|
||||
}
|
||||
|
||||
bool operator()(const view& k2, const authorized_prepared_statements_cache_key& k1) {
|
||||
return operator()(k1, k2);
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
cache_key_type _key;
|
||||
|
||||
@@ -122,12 +100,10 @@ private:
|
||||
|
||||
public:
|
||||
using key_type = cache_key_type;
|
||||
using key_view_type = typename key_type::view;
|
||||
using key_view_hasher = typename key_type::view_hasher;
|
||||
using key_view_equal = typename key_type::view_equal;
|
||||
using value_type = checked_weak_ptr;
|
||||
using entry_is_too_big = typename cache_type::entry_is_too_big;
|
||||
using value_ptr = typename cache_type::value_ptr;
|
||||
using iterator = typename cache_type::iterator;
|
||||
|
||||
private:
|
||||
cache_type _cache;
|
||||
logging::logger& _logger;
|
||||
@@ -148,12 +124,38 @@ public:
|
||||
}).discard_result();
|
||||
}
|
||||
|
||||
value_ptr find(const auth::authenticated_user& user, const cql3::prepared_cache_key_type& prep_cache_key) {
|
||||
return _cache.find(key_view_type{user, prep_cache_key}, key_view_hasher(), key_view_equal());
|
||||
iterator find(const auth::authenticated_user& user, const cql3::prepared_cache_key_type& prep_cache_key) {
|
||||
struct key_view {
|
||||
const auth::authenticated_user& user_ref;
|
||||
const cql3::prepared_cache_key_type& prep_cache_key_ref;
|
||||
};
|
||||
|
||||
struct hasher {
|
||||
size_t operator()(const key_view& kv) {
|
||||
return cql3::authorized_prepared_statements_cache_key::hash(kv.user_ref, kv.prep_cache_key_ref.key());
|
||||
}
|
||||
};
|
||||
|
||||
struct equal {
|
||||
bool operator()(const key_type& k1, const key_view& k2) {
|
||||
return k1.key().first == k2.user_ref && k1.key().second == k2.prep_cache_key_ref.key();
|
||||
}
|
||||
|
||||
bool operator()(const key_view& k2, const key_type& k1) {
|
||||
return operator()(k1, k2);
|
||||
}
|
||||
};
|
||||
|
||||
return _cache.find(key_view{user, prep_cache_key}, hasher(), equal());
|
||||
}
|
||||
|
||||
iterator end() {
|
||||
return _cache.end();
|
||||
}
|
||||
|
||||
void remove(const auth::authenticated_user& user, const cql3::prepared_cache_key_type& prep_cache_key) {
|
||||
_cache.remove(key_view_type{user, prep_cache_key}, key_view_hasher(), key_view_equal());
|
||||
iterator it = find(user, prep_cache_key);
|
||||
_cache.remove(it);
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
|
||||
@@ -230,7 +230,9 @@ public:
|
||||
} else if (value.is_unset_value()) {
|
||||
return;
|
||||
}
|
||||
auto increment = value_cast<int64_t>(long_type->deserialize_value(*value));
|
||||
auto increment = with_linearized(*value, [] (bytes_view value_view) {
|
||||
return value_cast<int64_t>(long_type->deserialize_value(value_view));
|
||||
});
|
||||
m.set_cell(prefix, column, make_counter_update_cell(increment, params));
|
||||
}
|
||||
};
|
||||
@@ -245,7 +247,9 @@ public:
|
||||
} else if (value.is_unset_value()) {
|
||||
return;
|
||||
}
|
||||
auto increment = value_cast<int64_t>(long_type->deserialize_value(*value));
|
||||
auto increment = with_linearized(*value, [] (bytes_view value_view) {
|
||||
return value_cast<int64_t>(long_type->deserialize_value(value_view));
|
||||
});
|
||||
if (increment == std::numeric_limits<int64_t>::min()) {
|
||||
throw exceptions::invalid_request_exception(format("The negation of {:d} overflows supported counter precision (signed 8 bytes integer)", increment));
|
||||
}
|
||||
|
||||
@@ -59,8 +59,6 @@ class result_message;
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
class query_processor;
|
||||
|
||||
class metadata;
|
||||
shared_ptr<const metadata> make_empty_metadata();
|
||||
|
||||
@@ -101,9 +99,11 @@ public:
|
||||
* @param options options for this query (consistency, variables, pageSize, ...)
|
||||
*/
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
execute(query_processor& qp, service::query_state& state, const query_options& options) const = 0;
|
||||
execute(service::storage_proxy& proxy, service::query_state& state, const query_options& options) const = 0;
|
||||
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const = 0;
|
||||
virtual bool depends_on_keyspace(const sstring& ks_name) const = 0;
|
||||
|
||||
virtual bool depends_on_column_family(const sstring& cf_name) const = 0;
|
||||
|
||||
virtual shared_ptr<const metadata> get_result_metadata() const = 0;
|
||||
|
||||
|
||||
@@ -45,8 +45,7 @@ using boost::adaptors::transformed;
|
||||
|
||||
namespace {
|
||||
|
||||
static
|
||||
bytes_opt do_get_value(const schema& schema,
|
||||
std::optional<atomic_cell_value_view> do_get_value(const schema& schema,
|
||||
const column_definition& cdef,
|
||||
const partition_key& key,
|
||||
const clustering_key_prefix& ckey,
|
||||
@@ -54,9 +53,9 @@ bytes_opt do_get_value(const schema& schema,
|
||||
gc_clock::time_point now) {
|
||||
switch (cdef.kind) {
|
||||
case column_kind::partition_key:
|
||||
return to_bytes(key.get_component(schema, cdef.component_index()));
|
||||
return atomic_cell_value_view(key.get_component(schema, cdef.component_index()));
|
||||
case column_kind::clustering_key:
|
||||
return to_bytes(ckey.get_component(schema, cdef.component_index()));
|
||||
return atomic_cell_value_view(ckey.get_component(schema, cdef.component_index()));
|
||||
default:
|
||||
auto cell = cells.find_cell(cdef.id);
|
||||
if (!cell) {
|
||||
@@ -64,7 +63,7 @@ bytes_opt do_get_value(const schema& schema,
|
||||
}
|
||||
assert(cdef.is_atomic());
|
||||
auto c = cell->as_atomic_cell(cdef);
|
||||
return c.is_dead(now) ? std::nullopt : bytes_opt(to_bytes(c.value()));
|
||||
return c.is_dead(now) ? std::nullopt : std::optional<atomic_cell_value_view>(c.value());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,8 +140,9 @@ bytes_opt get_value_from_partition_slice(
|
||||
|
||||
/// Returns col's value from a mutation.
|
||||
bytes_opt get_value_from_mutation(const column_value& col, row_data_from_mutation data) {
|
||||
return do_get_value(
|
||||
const auto v = do_get_value(
|
||||
data.schema_, *col.col, data.partition_key_, data.clustering_key_, data.other_columns, data.now);
|
||||
return v ? v->linearize() : bytes_opt();
|
||||
}
|
||||
|
||||
/// Returns col's value from the fetched data.
|
||||
@@ -156,7 +156,7 @@ bytes_opt get_value(const column_value& col, const column_value_eval_bag& bag) {
|
||||
|
||||
/// Type for comparing results of get_value().
|
||||
const abstract_type* get_value_comparator(const column_definition* cdef) {
|
||||
return &cdef->type->without_reversed();
|
||||
return cdef->type->is_reversed() ? cdef->type->underlying_type().get() : cdef->type.get();
|
||||
}
|
||||
|
||||
/// Type for comparing results of get_value().
|
||||
@@ -357,12 +357,16 @@ bytes_opt next_value(query::result_row_view::iterator_type& iter, const column_d
|
||||
if (cdef->type->is_multi_cell()) {
|
||||
auto cell = iter.next_collection_cell();
|
||||
if (cell) {
|
||||
return linearized(*cell);
|
||||
return cell->with_linearized([] (bytes_view data) {
|
||||
return bytes(data.cbegin(), data.cend());
|
||||
});
|
||||
}
|
||||
} else {
|
||||
auto cell = iter.next_atomic_cell();
|
||||
if (cell) {
|
||||
return linearized(cell->value());
|
||||
return cell->value().with_linearized([] (bytes_view data) {
|
||||
return bytes(data.cbegin(), data.cend());
|
||||
});
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
@@ -583,7 +587,7 @@ value_list get_IN_values(
|
||||
if (val == constants::UNSET_VALUE) {
|
||||
throw exceptions::invalid_request_exception(format("Invalid unset value for column {}", column_name));
|
||||
}
|
||||
statements::request_validations::check_not_null(val, "Invalid null value for column %s", column_name);
|
||||
statements::request_validations::check_not_null(val, "Invalid null value for IN tuple");
|
||||
return to_sorted_vector(static_pointer_cast<lists::value>(val)->get_elements() | non_null | deref, comparator);
|
||||
}
|
||||
throw std::logic_error(format("get_IN_values(single column) on invalid term {}", *t));
|
||||
@@ -611,6 +615,22 @@ value_list get_IN_values(const ::shared_ptr<term>& t, size_t k, const query_opti
|
||||
|
||||
static constexpr bool inclusive = true, exclusive = false;
|
||||
|
||||
/// A range of all X such that X op val.
|
||||
nonwrapping_range<bytes> to_range(oper_t op, const bytes& val) {
|
||||
switch (op) {
|
||||
case oper_t::GT:
|
||||
return nonwrapping_range<bytes>::make_starting_with(range_bound(val, exclusive));
|
||||
case oper_t::GTE:
|
||||
return nonwrapping_range<bytes>::make_starting_with(range_bound(val, inclusive));
|
||||
case oper_t::LT:
|
||||
return nonwrapping_range<bytes>::make_ending_with(range_bound(val, exclusive));
|
||||
case oper_t::LTE:
|
||||
return nonwrapping_range<bytes>::make_ending_with(range_bound(val, inclusive));
|
||||
default:
|
||||
throw std::logic_error(format("to_range: unknown comparison operator {}", op));
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
expression make_conjunction(expression a, expression b) {
|
||||
@@ -639,7 +659,7 @@ bool is_satisfied_by(
|
||||
std::vector<bytes_opt> first_multicolumn_bound(
|
||||
const expression& restr, const query_options& options, statements::bound bnd) {
|
||||
auto found = find_atom(restr, [bnd] (const binary_operator& oper) {
|
||||
return matches(oper.op, bnd) && is_multi_column(oper);
|
||||
return matches(oper.op, bnd) && std::holds_alternative<std::vector<column_value>>(oper.lhs);
|
||||
});
|
||||
if (found) {
|
||||
return static_pointer_cast<tuples::value>(found->rhs->bind(options))->get_elements();
|
||||
@@ -648,27 +668,6 @@ std::vector<bytes_opt> first_multicolumn_bound(
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
nonwrapping_range<T> to_range(oper_t op, const T& val) {
|
||||
static constexpr bool inclusive = true, exclusive = false;
|
||||
switch (op) {
|
||||
case oper_t::EQ:
|
||||
return nonwrapping_range<T>::make_singular(val);
|
||||
case oper_t::GT:
|
||||
return nonwrapping_range<T>::make_starting_with(interval_bound(val, exclusive));
|
||||
case oper_t::GTE:
|
||||
return nonwrapping_range<T>::make_starting_with(interval_bound(val, inclusive));
|
||||
case oper_t::LT:
|
||||
return nonwrapping_range<T>::make_ending_with(interval_bound(val, exclusive));
|
||||
case oper_t::LTE:
|
||||
return nonwrapping_range<T>::make_ending_with(interval_bound(val, inclusive));
|
||||
default:
|
||||
throw std::logic_error(format("to_range: unknown comparison operator {}", op));
|
||||
}
|
||||
}
|
||||
|
||||
template nonwrapping_range<clustering_key_prefix> to_range(oper_t, const clustering_key_prefix&);
|
||||
|
||||
value_set possible_lhs_values(const column_definition* cdef, const expression& expr, const query_options& options) {
|
||||
const auto type = cdef ? get_value_comparator(cdef) : long_type.get();
|
||||
return std::visit(overloaded_functor{
|
||||
@@ -741,9 +740,9 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
|
||||
if (oper.op == oper_t::EQ) {
|
||||
return value_list{*val};
|
||||
} else if (oper.op == oper_t::GT) {
|
||||
return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, exclusive));
|
||||
return nonwrapping_range<bytes>::make_starting_with(range_bound(*val, exclusive));
|
||||
} else if (oper.op == oper_t::GTE) {
|
||||
return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, inclusive));
|
||||
return nonwrapping_range<bytes>::make_starting_with(range_bound(*val, inclusive));
|
||||
}
|
||||
static const bytes MININT = serialized(std::numeric_limits<int64_t>::min()),
|
||||
MAXINT = serialized(std::numeric_limits<int64_t>::max());
|
||||
@@ -751,9 +750,9 @@ value_set possible_lhs_values(const column_definition* cdef, const expression& e
|
||||
// that as MAXINT for some reason.
|
||||
const auto adjusted_val = (*val == MININT) ? serialized(MAXINT) : *val;
|
||||
if (oper.op == oper_t::LT) {
|
||||
return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, exclusive));
|
||||
return nonwrapping_range<bytes>::make_ending_with(range_bound(adjusted_val, exclusive));
|
||||
} else if (oper.op == oper_t::LTE) {
|
||||
return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, inclusive));
|
||||
return nonwrapping_range<bytes>::make_ending_with(range_bound(adjusted_val, inclusive));
|
||||
}
|
||||
throw std::logic_error(format("get_token_interval invalid operator {}", oper.op));
|
||||
},
|
||||
@@ -786,11 +785,9 @@ bool is_supported_by(const expression& expr, const secondary_index::index& idx)
|
||||
return idx.supports_expression(*col.col, oper.op);
|
||||
},
|
||||
[&] (const std::vector<column_value>& cvs) {
|
||||
if (cvs.size() == 1) {
|
||||
return idx.supports_expression(*cvs[0].col, oper.op);
|
||||
}
|
||||
// We don't use index table for multi-column restrictions, as it cannot avoid filtering.
|
||||
return false;
|
||||
return boost::algorithm::any_of(cvs, [&] (const column_value& c) {
|
||||
return idx.supports_expression(*c.col, oper.op);
|
||||
});
|
||||
},
|
||||
[&] (const token&) { return false; },
|
||||
}, oper.lhs);
|
||||
@@ -812,7 +809,7 @@ bool has_supporting_index(
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const column_value& cv) {
|
||||
os << cv.col->name_as_text();
|
||||
os << *cv.col;
|
||||
if (cv.sub) {
|
||||
os << '[' << *cv.sub << ']';
|
||||
}
|
||||
@@ -827,10 +824,10 @@ std::ostream& operator<<(std::ostream& os, const expression& expr) {
|
||||
std::visit(overloaded_functor{
|
||||
[&] (const token& t) { os << "TOKEN"; },
|
||||
[&] (const column_value& col) {
|
||||
fmt::print(os, "{}", col);
|
||||
fmt::print(os, "({})", col);
|
||||
},
|
||||
[&] (const std::vector<column_value>& cvs) {
|
||||
fmt::print(os, "({})", fmt::join(cvs, ","));
|
||||
fmt::print(os, "(({}))", fmt::join(cvs, ","));
|
||||
},
|
||||
}, opr.lhs);
|
||||
os << ' ' << opr.op << ' ' << *opr.rhs;
|
||||
|
||||
@@ -73,18 +73,11 @@ struct token {};
|
||||
|
||||
enum class oper_t { EQ, NEQ, LT, LTE, GTE, GT, IN, CONTAINS, CONTAINS_KEY, IS_NOT, LIKE };
|
||||
|
||||
/// Describes the nature of clustering-key comparisons. Useful for implementing SCYLLA_CLUSTERING_BOUND.
|
||||
enum class comparison_order : char {
|
||||
cql, ///< CQL order. (a,b)>(1,1) is equivalent to a>1 OR (a=1 AND b>1).
|
||||
clustering, ///< Table's clustering order. (a,b)>(1,1) means any row past (1,1) in storage.
|
||||
};
|
||||
|
||||
/// Operator restriction: LHS op RHS.
|
||||
struct binary_operator {
|
||||
std::variant<column_value, std::vector<column_value>, token> lhs;
|
||||
oper_t op;
|
||||
::shared_ptr<term> rhs;
|
||||
comparison_order order = comparison_order::cql;
|
||||
};
|
||||
|
||||
/// A conjunction of restrictions.
|
||||
@@ -138,10 +131,6 @@ extern value_set possible_lhs_values(const column_definition*, const expression&
|
||||
/// Turns value_set into a range, unless it's a multi-valued list (in which case this throws).
|
||||
extern nonwrapping_range<bytes> to_range(const value_set&);
|
||||
|
||||
/// A range of all X such that X op val.
|
||||
template<typename T>
|
||||
nonwrapping_range<T> to_range(oper_t op, const T& val);
|
||||
|
||||
/// True iff the index can support the entire expression.
|
||||
extern bool is_supported_by(const expression&, const secondary_index::index&);
|
||||
|
||||
@@ -193,8 +182,7 @@ inline const binary_operator* find(const expression& e, oper_t op) {
|
||||
}
|
||||
|
||||
inline bool needs_filtering(oper_t op) {
|
||||
return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE) ||
|
||||
(op == oper_t::IS_NOT) || (op == oper_t::NEQ) ;
|
||||
return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE);
|
||||
}
|
||||
|
||||
inline auto find_needs_filtering(const expression& e) {
|
||||
@@ -223,10 +211,6 @@ inline bool is_compare(oper_t op) {
|
||||
}
|
||||
}
|
||||
|
||||
inline bool is_multi_column(const binary_operator& op) {
|
||||
return holds_alternative<std::vector<column_value>>(op.lhs);
|
||||
}
|
||||
|
||||
inline bool has_token(const expression& e) {
|
||||
return find_atom(e, [] (const binary_operator& o) { return std::holds_alternative<token>(o.lhs); });
|
||||
}
|
||||
@@ -235,14 +219,6 @@ inline bool has_slice_or_needs_filtering(const expression& e) {
|
||||
return find_atom(e, [] (const binary_operator& o) { return is_slice(o.op) || needs_filtering(o.op); });
|
||||
}
|
||||
|
||||
inline bool is_clustering_order(const binary_operator& op) {
|
||||
return op.order == comparison_order::clustering;
|
||||
}
|
||||
|
||||
inline auto find_clustering_order(const expression& e) {
|
||||
return find_atom(e, is_clustering_order);
|
||||
}
|
||||
|
||||
/// True iff binary_operator involves a collection.
|
||||
extern bool is_on_collection(const binary_operator&);
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ struct aggregate_type_for<simple_date_native_type> {
|
||||
|
||||
template<>
|
||||
struct aggregate_type_for<timeuuid_native_type> {
|
||||
using type = timeuuid_native_type;
|
||||
using type = timeuuid_native_type::primary_type;
|
||||
};
|
||||
|
||||
template<>
|
||||
@@ -227,7 +227,6 @@ struct aggregate_type_for<time_native_type> {
|
||||
using type = time_native_type::primary_type;
|
||||
};
|
||||
|
||||
// WARNING: never invoke this on temporary values; it will return a dangling reference.
|
||||
template <typename Type>
|
||||
const Type& max_wrapper(const Type& t1, const Type& t2) {
|
||||
using std::max;
|
||||
@@ -242,10 +241,6 @@ inline const net::inet_address& max_wrapper(const net::inet_address& t1, const n
|
||||
return std::memcmp(t1.data(), t2.data(), len) >= 0 ? t1 : t2;
|
||||
}
|
||||
|
||||
inline const timeuuid_native_type& max_wrapper(const timeuuid_native_type& t1, const timeuuid_native_type& t2) {
|
||||
return t1.uuid.timestamp() > t2.uuid.timestamp() ? t1 : t2;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
class impl_max_function_for final : public aggregate_function::aggregate {
|
||||
std::optional<typename aggregate_type_for<Type>::type> _max{};
|
||||
@@ -328,7 +323,6 @@ make_max_function() {
|
||||
return make_shared<max_function_for<Type>>();
|
||||
}
|
||||
|
||||
// WARNING: never invoke this on temporary values; it will return a dangling reference.
|
||||
template <typename Type>
|
||||
const Type& min_wrapper(const Type& t1, const Type& t2) {
|
||||
using std::min;
|
||||
@@ -343,10 +337,6 @@ inline const net::inet_address& min_wrapper(const net::inet_address& t1, const n
|
||||
return std::memcmp(t1.data(), t2.data(), len) <= 0 ? t1 : t2;
|
||||
}
|
||||
|
||||
inline timeuuid_native_type min_wrapper(timeuuid_native_type t1, timeuuid_native_type t2) {
|
||||
return t1.uuid.timestamp() < t2.uuid.timestamp() ? t1 : t2;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
class impl_min_function_for final : public aggregate_function::aggregate {
|
||||
std::optional<typename aggregate_type_for<Type>::type> _min{};
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "error_injection_fcts.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "types/list.hh"
|
||||
#include <seastar/core/map_reduce.hh>
|
||||
|
||||
namespace cql3
|
||||
{
|
||||
|
||||
@@ -54,7 +54,7 @@ std::ostream& operator<<(std::ostream& os, const std::vector<data_type>& arg_typ
|
||||
namespace cql3 {
|
||||
namespace functions {
|
||||
|
||||
logging::logger log("cql3_fuctions");
|
||||
static logging::logger log("cql3_fuctions");
|
||||
|
||||
bool abstract_function::requires_thread() const { return false; }
|
||||
|
||||
@@ -76,7 +76,7 @@ functions::init() noexcept {
|
||||
// that has less information in it. Given how unlikely it is that
|
||||
// we will run out of memory this early, having a better core dump
|
||||
// if we do seems like a good trade-off.
|
||||
memory::scoped_critical_alloc_section dfg;
|
||||
memory::disable_failure_guard dfg;
|
||||
|
||||
std::unordered_multimap<function_name, shared_ptr<function>> ret;
|
||||
auto declare = [&ret] (shared_ptr<function> f) { ret.emplace(f->name(), f); };
|
||||
@@ -181,18 +181,13 @@ inline
|
||||
shared_ptr<function>
|
||||
make_from_json_function(database& db, const sstring& keyspace, data_type t) {
|
||||
return make_native_scalar_function<true>("fromjson", t, {utf8_type},
|
||||
[&db, keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
|
||||
try {
|
||||
rjson::value json_value = rjson::parse(utf8_type->to_string(parameters[0].value()));
|
||||
bytes_opt parsed_json_value;
|
||||
if (!json_value.IsNull()) {
|
||||
parsed_json_value.emplace(from_json_object(*t, json_value, sf));
|
||||
}
|
||||
return parsed_json_value;
|
||||
} catch(rjson::error& e) {
|
||||
throw exceptions::function_execution_exception("fromJson",
|
||||
format("Failed parsing fromJson parameter: {}", e.what()), keyspace, {t->name()});
|
||||
[&db, &keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
|
||||
rjson::value json_value = rjson::parse(utf8_type->to_string(parameters[0].value()));
|
||||
bytes_opt parsed_json_value;
|
||||
if (!json_value.IsNull()) {
|
||||
parsed_json_value.emplace(from_json_object(*t, json_value, sf));
|
||||
}
|
||||
return parsed_json_value;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -78,22 +78,7 @@ public:
|
||||
return Pure;
|
||||
}
|
||||
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
|
||||
try {
|
||||
return _func(sf, parameters);
|
||||
} catch(exceptions::cassandra_exception&) {
|
||||
// If the function's code took the time to produce an official
|
||||
// cassandra_exception, pass it through. Otherwise, below we will
|
||||
// wrap the unknown exception in a function_execution_exception.
|
||||
throw;
|
||||
} catch(...) {
|
||||
std::vector<sstring> args;
|
||||
args.reserve(arg_types().size());
|
||||
for (const data_type& a : arg_types()) {
|
||||
args.push_back(a->name());
|
||||
}
|
||||
throw exceptions::function_execution_exception(name().name,
|
||||
format("Failed execution of function {}: {}", name(), std::current_exception()), name().keyspace, std::move(args));
|
||||
}
|
||||
return _func(sf, parameters);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -21,13 +21,9 @@
|
||||
|
||||
#include "user_function.hh"
|
||||
#include "lua.hh"
|
||||
#include "log.hh"
|
||||
|
||||
namespace cql3 {
|
||||
namespace functions {
|
||||
|
||||
extern logging::logger log;
|
||||
|
||||
user_function::user_function(function_name name, std::vector<data_type> arg_types, std::vector<sstring> arg_names,
|
||||
sstring body, sstring language, data_type return_type, bool called_on_null_input, sstring bitcode,
|
||||
lua::runtime_config cfg)
|
||||
@@ -60,9 +56,7 @@ bytes_opt user_function::execute(cql_serialization_format sf, const std::vector<
|
||||
}
|
||||
values.push_back(bytes ? type->deserialize(*bytes) : data_value::make_null(type));
|
||||
}
|
||||
if (!seastar::thread::running_in_thread()) {
|
||||
on_internal_error(log, "User function cannot be executed in this context");
|
||||
}
|
||||
|
||||
return lua::run_script(lua::bitcode_view{_bitcode}, values, return_type(), _cfg).get0();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,11 +53,11 @@ const sstring& index_name::get_idx() const
|
||||
return _idx_name;
|
||||
}
|
||||
|
||||
cf_name index_name::get_cf_name() const
|
||||
::shared_ptr<cf_name> index_name::get_cf_name() const
|
||||
{
|
||||
cf_name cf;
|
||||
auto cf = ::make_shared<cf_name>();
|
||||
if (has_keyspace()) {
|
||||
cf.set_keyspace(get_keyspace(), true);
|
||||
cf->set_keyspace(get_keyspace(), true);
|
||||
}
|
||||
return cf;
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
|
||||
const sstring& get_idx() const;
|
||||
|
||||
cf_name get_cf_name() const;
|
||||
::shared_ptr<cf_name> get_cf_name() const;
|
||||
|
||||
virtual sstring to_string() const override;
|
||||
};
|
||||
|
||||
@@ -55,7 +55,6 @@ bool keyspace_element_name::has_keyspace() const
|
||||
|
||||
const sstring& keyspace_element_name::get_keyspace() const
|
||||
{
|
||||
assert(_ks_name);
|
||||
return *_ks_name;
|
||||
}
|
||||
|
||||
|
||||
@@ -25,8 +25,8 @@
|
||||
#include "cql3_type.hh"
|
||||
#include "constants.hh"
|
||||
#include <boost/iterator/transform_iterator.hpp>
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
#include "types/list.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
@@ -40,7 +40,7 @@ lw_shared_ptr<column_specification>
|
||||
lists::value_spec_of(const column_specification& column) {
|
||||
return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
|
||||
::make_shared<column_identifier>(format("value({})", *column.name), true),
|
||||
dynamic_cast<const list_type_impl&>(column.type->without_reversed()).get_elements_type());
|
||||
dynamic_pointer_cast<const list_type_impl>(column.type)->get_elements_type());
|
||||
}
|
||||
|
||||
lw_shared_ptr<column_specification>
|
||||
@@ -87,7 +87,7 @@ lists::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<col
|
||||
|
||||
void
|
||||
lists::literal::validate_assignable_to(database& db, const sstring keyspace, const column_specification& receiver) const {
|
||||
if (!receiver.type->without_reversed().is_list()) {
|
||||
if (!dynamic_pointer_cast<const list_type_impl>(receiver.type)) {
|
||||
throw exceptions::invalid_request_exception(format("Invalid list literal for {} of type {}",
|
||||
*receiver.name, receiver.type->as_cql3_type()));
|
||||
}
|
||||
@@ -125,11 +125,18 @@ lists::literal::to_string() const {
|
||||
|
||||
lists::value
|
||||
lists::value::from_serialized(const fragmented_temporary_buffer::view& val, const list_type_impl& type, cql_serialization_format sf) {
|
||||
return with_linearized(val, [&] (bytes_view v) {
|
||||
return from_serialized(v, type, sf);
|
||||
});
|
||||
}
|
||||
|
||||
lists::value
|
||||
lists::value::from_serialized(bytes_view v, const list_type_impl& type, cql_serialization_format sf) {
|
||||
try {
|
||||
// Collections have this small hack that validate cannot be called on a serialized object,
|
||||
// but compose does the validation (so we're fine).
|
||||
// FIXME: deserializeForNativeProtocol()?!
|
||||
auto l = value_cast<list_type_impl::native_type>(type.deserialize(val, sf));
|
||||
auto l = value_cast<list_type_impl::native_type>(type.deserialize(v, sf));
|
||||
std::vector<bytes_opt> elements;
|
||||
elements.reserve(l.size());
|
||||
for (auto&& element : l) {
|
||||
@@ -220,15 +227,17 @@ lists::delayed_value::bind(const query_options& options) {
|
||||
::shared_ptr<terminal>
|
||||
lists::marker::bind(const query_options& options) {
|
||||
const auto& value = options.get_value_at(_bind_index);
|
||||
auto& ltype = dynamic_cast<const list_type_impl&>(_receiver->type->without_reversed());
|
||||
auto& ltype = static_cast<const list_type_impl&>(*_receiver->type);
|
||||
if (value.is_null()) {
|
||||
return nullptr;
|
||||
} else if (value.is_unset_value()) {
|
||||
return constants::UNSET_VALUE;
|
||||
} else {
|
||||
try {
|
||||
ltype.validate(*value, options.get_cql_serialization_format());
|
||||
return make_shared<lists::value>(value::from_serialized(*value, ltype, options.get_cql_serialization_format()));
|
||||
return with_linearized(*value, [&] (bytes_view v) {
|
||||
ltype.validate(v, options.get_cql_serialization_format());
|
||||
return make_shared<lists::value>(value::from_serialized(v, ltype, options.get_cql_serialization_format()));
|
||||
});
|
||||
} catch (marshal_exception& e) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Exception while binding column {:s}: {:s}", _receiver->name->to_cql_string(), e.what()));
|
||||
@@ -236,6 +245,20 @@ lists::marker::bind(const query_options& options) {
|
||||
}
|
||||
}
|
||||
|
||||
constexpr db_clock::time_point lists::precision_time::REFERENCE_TIME;
|
||||
thread_local lists::precision_time lists::precision_time::_last = {db_clock::time_point::max(), 0};
|
||||
|
||||
lists::precision_time
|
||||
lists::precision_time::get_next(db_clock::time_point millis) {
|
||||
// FIXME: and if time goes backwards?
|
||||
assert(millis <= _last.millis);
|
||||
auto next = millis < _last.millis
|
||||
? precision_time{millis, 9999}
|
||||
: precision_time{millis, std::max(0, _last.nanos - 1)};
|
||||
_last = next;
|
||||
return next;
|
||||
}
|
||||
|
||||
void
|
||||
lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
auto value = _t->bind(params._options);
|
||||
@@ -285,7 +308,9 @@ lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix
|
||||
return;
|
||||
}
|
||||
|
||||
auto idx = value_cast<int32_t>(data_type_for<int32_t>()->deserialize(*index));
|
||||
auto idx = with_linearized(*index, [] (bytes_view v) {
|
||||
return value_cast<int32_t>(data_type_for<int32_t>()->deserialize(v));
|
||||
});
|
||||
auto&& existing_list_opt = params.get_prefetched_list(m.key(), prefix, column);
|
||||
if (!existing_list_opt) {
|
||||
throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null");
|
||||
@@ -373,18 +398,10 @@ lists::do_append(shared_ptr<term> value,
|
||||
collection_mutation_description appended;
|
||||
appended.cells.reserve(to_add.size());
|
||||
for (auto&& e : to_add) {
|
||||
try {
|
||||
auto uuid1 = utils::UUID_gen::get_time_UUID_bytes_from_micros_and_submicros(
|
||||
params.timestamp(),
|
||||
params._options.next_list_append_seq());
|
||||
auto uuid = bytes(reinterpret_cast<const int8_t*>(uuid1.data()), uuid1.size());
|
||||
// FIXME: can e be empty?
|
||||
appended.cells.emplace_back(
|
||||
std::move(uuid),
|
||||
params.make_cell(*ltype->value_comparator(), *e, atomic_cell::collection_member::yes));
|
||||
} catch (utils::timeuuid_submicro_out_of_range) {
|
||||
throw exceptions::invalid_request_exception("Too many list values per single CQL statement or batch");
|
||||
}
|
||||
auto uuid1 = utils::UUID_gen::get_time_UUID_bytes();
|
||||
auto uuid = bytes(reinterpret_cast<const int8_t*>(uuid1.data()), uuid1.size());
|
||||
// FIXME: can e be empty?
|
||||
appended.cells.emplace_back(std::move(uuid), params.make_cell(*ltype->value_comparator(), *e, atomic_cell::collection_member::yes));
|
||||
}
|
||||
m.set_cell(prefix, column, appended.serialize(*ltype));
|
||||
} else {
|
||||
@@ -408,42 +425,20 @@ lists::prepender::execute(mutation& m, const clustering_key_prefix& prefix, cons
|
||||
|
||||
auto&& lvalue = dynamic_pointer_cast<lists::value>(std::move(value));
|
||||
assert(lvalue);
|
||||
|
||||
// For prepend we need to be able to generate a unique but decreasing
|
||||
// timeuuid. We achieve that by by using a time in the past which
|
||||
// is 2x the distance between the original timestamp (it
|
||||
// would be the current timestamp, user supplied timestamp, or
|
||||
// unique monotonic LWT timestsamp, whatever is in query
|
||||
// options) and a reference time of Jan 1 2010 00:00:00.
|
||||
// E.g. if query timestamp is Jan 1 2020 00:00:00, the prepend
|
||||
// timestamp will be Jan 1, 2000, 00:00:00.
|
||||
|
||||
// 2010-01-01T00:00:00+00:00 in api::timestamp_time format (microseconds)
|
||||
static constexpr int64_t REFERENCE_TIME_MICROS = 1262304000L * 1000 * 1000;
|
||||
|
||||
int64_t micros = params.timestamp();
|
||||
if (micros > REFERENCE_TIME_MICROS) {
|
||||
micros = REFERENCE_TIME_MICROS - (micros - REFERENCE_TIME_MICROS);
|
||||
} else {
|
||||
// Scylla, unlike Cassandra, respects user-supplied timestamps
|
||||
// in prepend, but there is nothing useful it can do with
|
||||
// a timestamp less than Jan 1, 2010, 00:00:00.
|
||||
throw exceptions::invalid_request_exception("List prepend custom timestamp must be greater than Jan 1 2010 00:00:00");
|
||||
}
|
||||
auto time = precision_time::REFERENCE_TIME - (db_clock::now() - precision_time::REFERENCE_TIME);
|
||||
|
||||
collection_mutation_description mut;
|
||||
mut.cells.reserve(lvalue->get_elements().size());
|
||||
|
||||
// We reverse the order of insertion, so that the last element gets the lastest time
|
||||
// (lists are sorted by time)
|
||||
auto ltype = static_cast<const list_type_impl*>(column.type.get());
|
||||
int clockseq = params._options.next_list_prepend_seq(lvalue->_elements.size(), utils::UUID_gen::SUBMICRO_LIMIT);
|
||||
for (auto&& v : lvalue->_elements) {
|
||||
try {
|
||||
auto uuid = utils::UUID_gen::get_time_UUID_bytes_from_micros_and_submicros(micros, clockseq++);
|
||||
mut.cells.emplace_back(bytes(uuid.data(), uuid.size()), params.make_cell(*ltype->value_comparator(), *v, atomic_cell::collection_member::yes));
|
||||
} catch (utils::timeuuid_submicro_out_of_range) {
|
||||
throw exceptions::invalid_request_exception("Too many list values per single CQL statement or batch");
|
||||
}
|
||||
for (auto&& v : lvalue->_elements | boost::adaptors::reversed) {
|
||||
auto&& pt = precision_time::get_next(time);
|
||||
auto uuid = utils::UUID_gen::get_time_UUID_bytes(pt.millis.time_since_epoch().count(), pt.nanos);
|
||||
mut.cells.emplace_back(bytes(uuid.data(), uuid.size()), params.make_cell(*ltype->value_comparator(), *v, atomic_cell::collection_member::yes));
|
||||
}
|
||||
// now reverse again, to get the original order back
|
||||
std::reverse(mut.cells.begin(), mut.cells.end());
|
||||
m.set_cell(prefix, column, mut.serialize(*ltype));
|
||||
}
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
|
||||
#include "cql3/abstract_marker.hh"
|
||||
#include "to_string.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
#include "operation.hh"
|
||||
|
||||
namespace cql3 {
|
||||
@@ -72,6 +73,7 @@ public:
|
||||
};
|
||||
|
||||
class value : public multi_item_terminal, collection_terminal {
|
||||
static value from_serialized(bytes_view v, const list_type_impl& type, cql_serialization_format sf);
|
||||
public:
|
||||
std::vector<bytes_opt> _elements;
|
||||
public:
|
||||
@@ -120,6 +122,28 @@ public:
|
||||
virtual ::shared_ptr<terminal> bind(const query_options& options) override;
|
||||
};
|
||||
|
||||
/*
|
||||
* For prepend, we need to be able to generate unique but decreasing time
|
||||
* UUID, which is a bit challenging. To do that, given a time in milliseconds,
|
||||
* we adds a number representing the 100-nanoseconds precision and make sure
|
||||
* that within the same millisecond, that number is always decreasing. We
|
||||
* do rely on the fact that the user will only provide decreasing
|
||||
* milliseconds timestamp for that purpose.
|
||||
*/
|
||||
private:
|
||||
class precision_time {
|
||||
public:
|
||||
// Our reference time (1 jan 2010, 00:00:00) in milliseconds.
|
||||
static constexpr db_clock::time_point REFERENCE_TIME{std::chrono::milliseconds(1262304000000)};
|
||||
private:
|
||||
static thread_local precision_time _last;
|
||||
public:
|
||||
db_clock::time_point millis;
|
||||
int32_t nanos;
|
||||
|
||||
static precision_time get_next(db_clock::time_point millis);
|
||||
};
|
||||
|
||||
public:
|
||||
class setter : public operation {
|
||||
public:
|
||||
|
||||
26
cql3/maps.cc
26
cql3/maps.cc
@@ -55,14 +55,14 @@ lw_shared_ptr<column_specification>
|
||||
maps::key_spec_of(const column_specification& column) {
|
||||
return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
|
||||
::make_shared<column_identifier>(format("key({})", *column.name), true),
|
||||
dynamic_cast<const map_type_impl&>(column.type->without_reversed()).get_keys_type());
|
||||
dynamic_pointer_cast<const map_type_impl>(column.type)->get_keys_type());
|
||||
}
|
||||
|
||||
lw_shared_ptr<column_specification>
|
||||
maps::value_spec_of(const column_specification& column) {
|
||||
return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
|
||||
::make_shared<column_identifier>(format("value({})", *column.name), true),
|
||||
dynamic_cast<const map_type_impl&>(column.type->without_reversed()).get_values_type());
|
||||
dynamic_pointer_cast<const map_type_impl>(column.type)->get_values_type());
|
||||
}
|
||||
|
||||
::shared_ptr<term>
|
||||
@@ -88,9 +88,7 @@ maps::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<colu
|
||||
|
||||
values.emplace(k, v);
|
||||
}
|
||||
delayed_value value(
|
||||
dynamic_cast<const map_type_impl&>(receiver->type->without_reversed()).get_keys_type()->as_less_comparator(),
|
||||
values);
|
||||
delayed_value value(static_pointer_cast<const map_type_impl>(receiver->type)->get_keys_type()->as_less_comparator(), values);
|
||||
if (all_terminal) {
|
||||
return value.bind(query_options::DEFAULT);
|
||||
} else {
|
||||
@@ -100,7 +98,7 @@ maps::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<colu
|
||||
|
||||
void
|
||||
maps::literal::validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const {
|
||||
if (!receiver.type->without_reversed().is_map()) {
|
||||
if (!dynamic_pointer_cast<const map_type_impl>(receiver.type)) {
|
||||
throw exceptions::invalid_request_exception(format("Invalid map literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
|
||||
}
|
||||
auto&& key_spec = maps::key_spec_of(receiver);
|
||||
@@ -160,13 +158,15 @@ maps::value::from_serialized(const fragmented_temporary_buffer::view& fragmented
|
||||
// Collections have this small hack that validate cannot be called on a serialized object,
|
||||
// but compose does the validation (so we're fine).
|
||||
// FIXME: deserialize_for_native_protocol?!
|
||||
auto m = value_cast<map_type_impl::native_type>(type.deserialize(fragmented_value, sf));
|
||||
return with_linearized(fragmented_value, [&] (bytes_view value) {
|
||||
auto m = value_cast<map_type_impl::native_type>(type.deserialize(value, sf));
|
||||
std::map<bytes, bytes, serialized_compare> map(type.get_keys_type()->as_less_comparator());
|
||||
for (auto&& e : m) {
|
||||
map.emplace(type.get_keys_type()->decompose(e.first),
|
||||
type.get_values_type()->decompose(e.second));
|
||||
}
|
||||
return maps::value { std::move(map) };
|
||||
});
|
||||
} catch (marshal_exception& e) {
|
||||
throw exceptions::invalid_request_exception(e.what());
|
||||
}
|
||||
@@ -263,16 +263,14 @@ maps::marker::bind(const query_options& options) {
|
||||
return constants::UNSET_VALUE;
|
||||
}
|
||||
try {
|
||||
_receiver->type->validate(*val, options.get_cql_serialization_format());
|
||||
with_linearized(*val, [&] (bytes_view value) {
|
||||
_receiver->type->validate(value, options.get_cql_serialization_format());
|
||||
});
|
||||
} catch (marshal_exception& e) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Exception while binding column {:s}: {:s}", _receiver->name->to_cql_string(), e.what()));
|
||||
}
|
||||
return ::make_shared<maps::value>(
|
||||
maps::value::from_serialized(
|
||||
*val,
|
||||
dynamic_cast<const map_type_impl&>(_receiver->type->without_reversed()),
|
||||
options.get_cql_serialization_format()));
|
||||
return ::make_shared<maps::value>(maps::value::from_serialized(*val, static_cast<const map_type_impl&>(*_receiver->type), options.get_cql_serialization_format()));
|
||||
}
|
||||
|
||||
void
|
||||
@@ -310,7 +308,7 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
|
||||
if (value.is_unset_value()) {
|
||||
return;
|
||||
}
|
||||
if (key.is_unset_value()) {
|
||||
if (key.is_unset_value() || value.is_unset_value()) {
|
||||
throw invalid_request_exception("Invalid unset map key");
|
||||
}
|
||||
if (!key) {
|
||||
|
||||
@@ -59,32 +59,30 @@ namespace cql3 {
|
||||
* - SELECT ... WHERE (a, b) IN ?
|
||||
*/
|
||||
class multi_column_relation final : public relation {
|
||||
public:
|
||||
using mode = expr::comparison_order;
|
||||
private:
|
||||
std::vector<shared_ptr<column_identifier::raw>> _entities;
|
||||
shared_ptr<term::multi_column_raw> _values_or_marker;
|
||||
std::vector<shared_ptr<term::multi_column_raw>> _in_values;
|
||||
shared_ptr<tuples::in_raw> _in_marker;
|
||||
mode _mode;
|
||||
|
||||
public:
|
||||
|
||||
multi_column_relation(std::vector<shared_ptr<column_identifier::raw>> entities,
|
||||
expr::oper_t relation_type, shared_ptr<term::multi_column_raw> values_or_marker,
|
||||
std::vector<shared_ptr<term::multi_column_raw>> in_values, shared_ptr<tuples::in_raw> in_marker, mode m = mode::cql)
|
||||
std::vector<shared_ptr<term::multi_column_raw>> in_values, shared_ptr<tuples::in_raw> in_marker)
|
||||
: relation(relation_type)
|
||||
, _entities(std::move(entities))
|
||||
, _values_or_marker(std::move(values_or_marker))
|
||||
, _in_values(std::move(in_values))
|
||||
, _in_marker(std::move(in_marker))
|
||||
, _mode(m)
|
||||
{ }
|
||||
|
||||
static shared_ptr<multi_column_relation> create_multi_column_relation(
|
||||
std::vector<shared_ptr<column_identifier::raw>> entities, expr::oper_t relation_type,
|
||||
shared_ptr<term::multi_column_raw> values_or_marker, std::vector<shared_ptr<term::multi_column_raw>> in_values,
|
||||
shared_ptr<tuples::in_raw> in_marker, mode m = mode::cql) {
|
||||
shared_ptr<tuples::in_raw> in_marker) {
|
||||
return ::make_shared<multi_column_relation>(std::move(entities), relation_type, std::move(values_or_marker),
|
||||
std::move(in_values), std::move(in_marker), m);
|
||||
std::move(in_values), std::move(in_marker));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -101,15 +99,6 @@ public:
|
||||
return create_multi_column_relation(std::move(entities), relation_type, std::move(values_or_marker), {}, {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as above, but sets the magic mode that causes us to treat the restrictions as "raw" clustering bounds
|
||||
*/
|
||||
static shared_ptr<multi_column_relation> create_scylla_clustering_bound_non_in_relation(std::vector<shared_ptr<column_identifier::raw>> entities,
|
||||
expr::oper_t relation_type, shared_ptr<term::multi_column_raw> values_or_marker) {
|
||||
assert(relation_type != expr::oper_t::IN);
|
||||
return create_multi_column_relation(std::move(entities), relation_type, std::move(values_or_marker), {}, {}, mode::clustering);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a multi-column IN relation with a list of IN values or markers.
|
||||
* For example: "SELECT ... WHERE (a, b) IN ((0, 1), (2, 3))"
|
||||
@@ -202,7 +191,7 @@ protected:
|
||||
return cs->column_specification;
|
||||
});
|
||||
auto t = to_term(col_specs, *get_value(), db, schema->ks_name(), bound_names);
|
||||
return ::make_shared<restrictions::multi_column_restriction::slice>(schema, rs, bound, inclusive, t, _mode);
|
||||
return ::make_shared<restrictions::multi_column_restriction::slice>(schema, rs, bound, inclusive, t);
|
||||
}
|
||||
|
||||
virtual shared_ptr<restrictions::restriction> new_contains_restriction(database& db, schema_ptr schema,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user