Compare commits
89 Commits
next-4.0
...
branch-3.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fd293768e7 | ||
|
|
22dfa48585 | ||
|
|
2f3d7f1408 | ||
|
|
76a08df939 | ||
|
|
6aa129d3b0 | ||
|
|
b4f781e4eb | ||
|
|
27594ca50e | ||
|
|
0f2f0d65d7 | ||
|
|
31c2f8a3ae | ||
|
|
ec12331f11 | ||
|
|
ccc463b5e5 | ||
|
|
4a9676f6b7 | ||
|
|
aaf4989c31 | ||
|
|
b29f954f20 | ||
|
|
5546d5df7b | ||
|
|
541c29677f | ||
|
|
06f18108c0 | ||
|
|
90002ca3d2 | ||
|
|
da23902311 | ||
|
|
2b0dc21f97 | ||
|
|
b544691493 | ||
|
|
d420b06844 | ||
|
|
b3a2cb2f68 | ||
|
|
c8c057f5f8 | ||
|
|
038bfc925c | ||
|
|
13a4e7db83 | ||
|
|
727d6cf8f3 | ||
|
|
6d6d7b4abe | ||
|
|
28f974b810 | ||
|
|
5fdadcaf3b | ||
|
|
a960394f27 | ||
|
|
3216a1a70a | ||
|
|
5a7fd41618 | ||
|
|
dd24ba7a62 | ||
|
|
204f6dd393 | ||
|
|
b1278adc15 | ||
|
|
ee9677ef71 | ||
|
|
2060e361cf | ||
|
|
6f939ffe19 | ||
|
|
69105bde8a | ||
|
|
e09e9a5929 | ||
|
|
2308bdbccb | ||
|
|
a2d39c9a2e | ||
|
|
5fe2ce3bbe | ||
|
|
aafa34bbad | ||
|
|
7ae2cdf46c | ||
|
|
863f88c067 | ||
|
|
90b4e9e595 | ||
|
|
434ad4548f | ||
|
|
cbbb15af5c | ||
|
|
3231580c05 | ||
|
|
62364d9dcd | ||
|
|
3bed8063f6 | ||
|
|
413fcab833 | ||
|
|
9f3c3036bf | ||
|
|
ff2e108a6d | ||
|
|
ade788ffe8 | ||
|
|
1f8bb754d9 | ||
|
|
7b2eb09225 | ||
|
|
d2293f9fd5 | ||
|
|
25b31f6c23 | ||
|
|
742a1ce7d6 | ||
|
|
4ca9d23b83 | ||
|
|
9e97f3a9b3 | ||
|
|
183418f228 | ||
|
|
756574d094 | ||
|
|
a348418918 | ||
|
|
06c0bd0681 | ||
|
|
223c300435 | ||
|
|
ac8bef6781 | ||
|
|
68691907af | ||
|
|
f59d2fcbf1 | ||
|
|
bdc542143e | ||
|
|
061a02237c | ||
|
|
35b6505517 | ||
|
|
866c04dd64 | ||
|
|
dc588e6e7b | ||
|
|
f842154453 | ||
|
|
b38193f71d | ||
|
|
f47ba6dc06 | ||
|
|
0d0c1d4318 | ||
|
|
9225b17b99 | ||
|
|
00b3f28199 | ||
|
|
1bbe619689 | ||
|
|
c36f71c783 | ||
|
|
f5471d268b | ||
|
|
fd5c65d9dc | ||
|
|
3aa406bf00 | ||
|
|
c0253d9221 |
@@ -1,4 +1,3 @@
|
||||
.git
|
||||
build
|
||||
seastar/build
|
||||
testlog
|
||||
|
||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -15,6 +15,3 @@
|
||||
[submodule "zstd"]
|
||||
path = zstd
|
||||
url = ../zstd
|
||||
[submodule "abseil"]
|
||||
path = abseil
|
||||
url = ../abseil-cpp
|
||||
|
||||
@@ -5,25 +5,13 @@
|
||||
cmake_minimum_required(VERSION 3.7)
|
||||
project(scylla)
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
message(STATUS "Setting build type to 'Release' as none was specified.")
|
||||
set(CMAKE_BUILD_TYPE "Release" CACHE
|
||||
STRING "Choose the type of build." FORCE)
|
||||
# Set the possible values of build type for cmake-gui
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
|
||||
"Debug" "Release" "Dev" "Sanitize")
|
||||
endif()
|
||||
|
||||
if(CMAKE_BUILD_TYPE)
|
||||
string(TOLOWER "${CMAKE_BUILD_TYPE}" BUILD_TYPE)
|
||||
else()
|
||||
set(BUILD_TYPE "release")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED FOR_IDE AND NOT DEFINED ENV{FOR_IDE} AND NOT DEFINED ENV{CLION_IDE})
|
||||
message(FATAL_ERROR "This CMakeLists.txt file is only valid for use in IDEs, please define FOR_IDE to acknowledge this.")
|
||||
endif()
|
||||
|
||||
# Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
|
||||
set(SEASTAR_INCLUDE_DIRS "seastar")
|
||||
|
||||
# These paths are always available, since they're included in the repository. Additional DPDK headers are placed while
|
||||
# Seastar is built, and are captured in `SEASTAR_INCLUDE_DIRS` through parsing the Seastar pkg-config file (below).
|
||||
set(SEASTAR_DPDK_INCLUDE_DIRS
|
||||
@@ -34,14 +22,9 @@ set(SEASTAR_DPDK_INCLUDE_DIRS
|
||||
|
||||
find_package(PkgConfig REQUIRED)
|
||||
|
||||
set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/build/${BUILD_TYPE}/seastar:$ENV{PKG_CONFIG_PATH}")
|
||||
set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/seastar/build/release:$ENV{PKG_CONFIG_PATH}")
|
||||
pkg_check_modules(SEASTAR seastar)
|
||||
|
||||
if(NOT SEASTAR_INCLUDE_DIRS)
|
||||
# Default value. A more accurate list is populated through `pkg-config` below if `seastar.pc` is available.
|
||||
set(SEASTAR_INCLUDE_DIRS "seastar/include")
|
||||
endif()
|
||||
|
||||
find_package(Boost COMPONENTS filesystem program_options system thread)
|
||||
|
||||
##
|
||||
@@ -87,7 +70,7 @@ scan_scylla_source_directories(
|
||||
seastar/json
|
||||
seastar/net
|
||||
seastar/rpc
|
||||
seastar/testing
|
||||
seastar/tests
|
||||
seastar/util)
|
||||
|
||||
scan_scylla_source_directories(
|
||||
@@ -123,7 +106,7 @@ scan_scylla_source_directories(
|
||||
scan_scylla_source_directories(
|
||||
VAR SCYLLA_GEN_SOURCE_FILES
|
||||
RECURSIVE
|
||||
PATHS build/${BUILD_TYPE}/gen)
|
||||
PATHS build/release/gen)
|
||||
|
||||
set(SCYLLA_SOURCE_FILES
|
||||
${SCYLLA_ROOT_SOURCE_FILES}
|
||||
@@ -156,4 +139,4 @@ target_include_directories(scylla PUBLIC
|
||||
${Boost_INCLUDE_DIRS}
|
||||
xxhash
|
||||
libdeflate
|
||||
build/${BUILD_TYPE}/gen)
|
||||
build/release/gen)
|
||||
|
||||
@@ -141,7 +141,7 @@ In v3:
|
||||
"Tests: unit ({mode}), dtest ({smp})"
|
||||
```
|
||||
|
||||
The usual is "Tests: unit (dev)", although running debug tests is encouraged.
|
||||
The usual is "Tests: unit (release)", although running debug tests is encouraged.
|
||||
|
||||
5. When answering review comments, prefer inline quotes as they make it easier to track the conversation across multiple e-mails.
|
||||
|
||||
|
||||
33
README.md
33
README.md
@@ -38,10 +38,6 @@ Please see [HACKING.md](HACKING.md) for detailed information on building and dev
|
||||
./build/release/scylla --help
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
See [test.py manual](docs/testing.md).
|
||||
|
||||
## Scylla APIs and compatibility
|
||||
By default, Scylla is compatible with Apache Cassandra and its APIs - CQL and
|
||||
Thrift. There is also experimental support for the API of Amazon DynamoDB,
|
||||
@@ -60,12 +56,31 @@ both.
|
||||
Seastar documentation can be found [here](http://docs.seastar.io/master/index.html).
|
||||
User documentation can be found [here](https://docs.scylladb.com/).
|
||||
|
||||
## Training
|
||||
## Building Fedora RPM
|
||||
|
||||
Training material and online courses can be found at [Scylla University](https://university.scylladb.com/).
|
||||
The courses are free, self-paced and include hands-on examples. They cover a variety of topics including Scylla data modeling,
|
||||
administration, architecture, basic NoSQL concepts, using drivers for application development, Scylla setup, failover, compactions,
|
||||
multi-datacenters and how Scylla integrates with third-party applications.
|
||||
As a pre-requisite, you need to install [Mock](https://fedoraproject.org/wiki/Mock) on your machine:
|
||||
|
||||
```
|
||||
# Install mock:
|
||||
sudo yum install mock
|
||||
|
||||
# Add user to the "mock" group:
|
||||
usermod -a -G mock $USER && newgrp mock
|
||||
```
|
||||
|
||||
Then, to build an RPM, run:
|
||||
|
||||
```
|
||||
./dist/redhat/build_rpm.sh
|
||||
```
|
||||
|
||||
The built RPM is stored in ``/var/lib/mock/<configuration>/result`` directory.
|
||||
For example, on Fedora 21 mock reports the following:
|
||||
|
||||
```
|
||||
INFO: Done(scylla-server-0.00-1.fc21.src.rpm) Config(default) 20 minutes 7 seconds
|
||||
INFO: Results and/or logs in: /var/lib/mock/fedora-21-x86_64/result
|
||||
```
|
||||
|
||||
## Building Fedora-based Docker image
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
PRODUCT=scylla
|
||||
VERSION=4.0.11
|
||||
VERSION=3.3.4
|
||||
|
||||
if test -f version
|
||||
then
|
||||
@@ -19,14 +19,6 @@ else
|
||||
SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT
|
||||
fi
|
||||
|
||||
if [ -f build/SCYLLA-RELEASE-FILE ]; then
|
||||
RELEASE_FILE=$(cat build/SCYLLA-RELEASE-FILE)
|
||||
GIT_COMMIT_FILE=$(cat build/SCYLLA-RELEASE-FILE |cut -d . -f 3)
|
||||
if [ "$GIT_COMMIT" = "$GIT_COMMIT_FILE" ]; then
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$SCYLLA_VERSION-$SCYLLA_RELEASE"
|
||||
mkdir -p build
|
||||
echo "$SCYLLA_VERSION" > build/SCYLLA-VERSION-FILE
|
||||
|
||||
1
abseil
1
abseil
Submodule abseil deleted from 2069dc796a
@@ -26,14 +26,6 @@ import pytest
|
||||
import boto3
|
||||
from util import create_test_table
|
||||
|
||||
# When tests are run with HTTPS, the server often won't have its SSL
|
||||
# certificate signed by a known authority. So we will disable certificate
|
||||
# verification with the "verify=False" request option. However, once we do
|
||||
# that, we start getting scary-looking warning messages, saying that this
|
||||
# makes HTTPS insecure. The following silences those warnings:
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
# Test that the Boto libraries are new enough. These tests want to test a
|
||||
# large variety of DynamoDB API features, and to do this we need a new-enough
|
||||
# version of the the Boto libraries (boto3 and botocore) so that they can
|
||||
@@ -54,8 +46,6 @@ def pytest_addoption(parser):
|
||||
parser.addoption("--https", action="store_true",
|
||||
help="communicate via HTTPS protocol on port 8043 instead of HTTP when"
|
||||
" running against a local Scylla installation")
|
||||
parser.addoption("--url", action="store",
|
||||
help="communicate with given URL instead of defaults")
|
||||
|
||||
# "dynamodb" fixture: set up client object for communicating with the DynamoDB
|
||||
# API. Currently this chooses either Amazon's DynamoDB in the default region
|
||||
@@ -72,15 +62,15 @@ def dynamodb(request):
|
||||
# requires us to specify dummy region and credential parameters,
|
||||
# otherwise the user is forced to properly configure ~/.aws even
|
||||
# for local runs.
|
||||
if request.config.getoption('url') != None:
|
||||
local_url = request.config.getoption('url')
|
||||
else:
|
||||
local_url = 'https://localhost:8043' if request.config.getoption('https') else 'http://localhost:8000'
|
||||
local_url = 'https://localhost:8043' if request.config.getoption('https') else 'http://localhost:8000'
|
||||
# Disable verifying in order to be able to use self-signed TLS certificates
|
||||
verify = not request.config.getoption('https')
|
||||
# Silencing the 'Unverified HTTPS request warning'
|
||||
if request.config.getoption('https'):
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
return boto3.resource('dynamodb', endpoint_url=local_url, verify=verify,
|
||||
region_name='us-east-1', aws_access_key_id='alternator', aws_secret_access_key='secret_pass',
|
||||
config=botocore.client.Config(retries={"max_attempts": 3}))
|
||||
region_name='us-east-1', aws_access_key_id='alternator', aws_secret_access_key='secret_pass')
|
||||
|
||||
# "test_table" fixture: Create and return a temporary table to be used in tests
|
||||
# that need a table to work on. The table is automatically deleted at the end.
|
||||
@@ -125,15 +115,6 @@ def test_table_s(dynamodb):
|
||||
AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' } ])
|
||||
yield table
|
||||
table.delete()
|
||||
# test_table_s_2 has exactly the same schema as test_table_s, and is useful
|
||||
# for tests which need two different tables with the same schema.
|
||||
@pytest.fixture(scope="session")
|
||||
def test_table_s_2(dynamodb):
|
||||
table = create_test_table(dynamodb,
|
||||
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }, ],
|
||||
AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' } ])
|
||||
yield table
|
||||
table.delete()
|
||||
@pytest.fixture(scope="session")
|
||||
def test_table_b(dynamodb):
|
||||
table = create_test_table(dynamodb,
|
||||
@@ -196,11 +177,3 @@ def filled_test_table(dynamodb):
|
||||
|
||||
yield table, items
|
||||
table.delete()
|
||||
|
||||
# The "scylla_only" fixture can be used by tests for Scylla-only features,
|
||||
# which do not exist on AWS DynamoDB. A test using this fixture will be
|
||||
# skipped if running with "--aws".
|
||||
@pytest.fixture(scope="session")
|
||||
def scylla_only(dynamodb):
|
||||
if dynamodb.meta.client._endpoint.host.endswith('.amazonaws.com'):
|
||||
pytest.skip('Scylla-only feature not supported by AWS')
|
||||
@@ -59,18 +59,6 @@ def test_expired_signature(dynamodb, test_table):
|
||||
assert not response.ok
|
||||
assert "InvalidSignatureException" in response.text and "Signature expired" in response.text
|
||||
|
||||
# A test verifying that missing Authorization header is handled properly
|
||||
def test_no_authorization_header(dynamodb, test_table):
|
||||
url = dynamodb.meta.client._endpoint.host
|
||||
print(url)
|
||||
headers = {'Content-Type': 'application/x-amz-json-1.0',
|
||||
'X-Amz-Date': '20170101T010101Z',
|
||||
'X-Amz-Target': 'DynamoDB_20120810.DescribeEndpoints',
|
||||
}
|
||||
response = requests.post(url, headers=headers, verify=False)
|
||||
assert not response.ok
|
||||
assert "InvalidSignatureException" in response.text and "Authorization header" in response.text
|
||||
|
||||
# A test ensuring that signatures that exceed current time too much are not accepted.
|
||||
# Watch out - this test is valid only for around next 1000 years, it needs to be updated later.
|
||||
def test_signature_too_futuristic(dynamodb, test_table):
|
||||
@@ -20,7 +20,6 @@
|
||||
# so they are actually tested by other tests as well.
|
||||
|
||||
import pytest
|
||||
import random
|
||||
from botocore.exceptions import ClientError
|
||||
from util import random_string, full_scan, full_query, multiset
|
||||
|
||||
@@ -45,19 +44,6 @@ def test_basic_batch_write_item(test_table):
|
||||
assert item['attribute'] == str(i)
|
||||
assert item['another'] == 'xyz'
|
||||
|
||||
# Try a batch which includes both multiple writes to the same partition
|
||||
# and several partitions. The LWT code collects multiple mutations to the
|
||||
# same partition together, and we want to test that this worked correctly.
|
||||
def test_batch_write_item_mixed(test_table):
|
||||
partitions = [random_string() for i in range(4)]
|
||||
items = [{'p': p, 'c': str(i)} for p in partitions for i in range(4)]
|
||||
with test_table.batch_writer() as batch:
|
||||
# Reorder items randomly, just for the heck of it
|
||||
for item in random.sample(items, len(items)):
|
||||
batch.put_item(item)
|
||||
for item in items:
|
||||
assert test_table.get_item(Key={'p': item['p'], 'c': item['c']}, ConsistentRead=True)['Item'] == item
|
||||
|
||||
# Test batch write to a table with only a hash key
|
||||
def test_batch_write_hash_only(test_table_s):
|
||||
items = [{'p': random_string(), 'val': random_string()} for i in range(10)]
|
||||
@@ -152,20 +138,6 @@ def test_batch_write_duplicate_write_and_delete(test_table_s, test_table):
|
||||
batch.put_item({'p': p, 'c': other})
|
||||
batch.put_item({'p': other, 'c': c})
|
||||
|
||||
# The BatchWriteIem API allows writing to more than one table in the same
|
||||
# batch. This test verifies that the duplicate-key checking doesn't mistake
|
||||
# updates to the same key in different tables to be duplicates.
|
||||
def test_batch_write_nonduplicate_multiple_tables(test_table_s, test_table_s_2):
|
||||
p = random_string()
|
||||
# The batch_writer() function used in previous tests can't write to more
|
||||
# than one table. So we use the lower level interface boto3 gives us.
|
||||
reply = test_table_s.meta.client.batch_write_item(RequestItems = {
|
||||
test_table_s.name: [{'PutRequest': {'Item': {'p': p, 'a': 'hi'}}}],
|
||||
test_table_s_2.name: [{'PutRequest': {'Item': {'p': p, 'b': 'hello'}}}]
|
||||
})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 'hi'}
|
||||
assert test_table_s_2.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': 'hello'}
|
||||
|
||||
# Test that BatchWriteItem's PutRequest completely replaces an existing item.
|
||||
# It shouldn't merge it with a previously existing value. See also the same
|
||||
# test for PutItem - test_put_item_replace().
|
||||
@@ -210,32 +182,6 @@ def test_batch_write_invalid_operation(test_table_s):
|
||||
for p in [p1, p2]:
|
||||
assert not 'item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
|
||||
|
||||
# In test_item.py we have a bunch of test_empty_* tests on different ways to
|
||||
# create an empty item (which in Scylla requires the special CQL row marker
|
||||
# to be supported correctly). BatchWriteItems provides yet another way of
|
||||
# creating items, so check the empty case here too:
|
||||
def test_empty_batch_write(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
with test_table.batch_writer() as batch:
|
||||
batch.put_item({'p': p, 'c': c})
|
||||
assert test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c}
|
||||
|
||||
# Test that BatchWriteItems allows writing to multiple tables in one operation
|
||||
def test_batch_write_multiple_tables(test_table_s, test_table):
|
||||
p1 = random_string()
|
||||
c1 = random_string()
|
||||
p2 = random_string()
|
||||
# We use the low-level batch_write_item API for lack of a more convenient
|
||||
# API (the batch_writer() API can only write to one table). At least it
|
||||
# spares us the need to encode the key's types...
|
||||
reply = test_table.meta.client.batch_write_item(RequestItems = {
|
||||
test_table.name: [{'PutRequest': {'Item': {'p': p1, 'c': c1, 'a': 'hi'}}}],
|
||||
test_table_s.name: [{'PutRequest': {'Item': {'p': p2, 'b': 'hello'}}}]
|
||||
})
|
||||
assert test_table.get_item(Key={'p': p1, 'c': c1}, ConsistentRead=True)['Item'] == {'p': p1, 'c': c1, 'a': 'hi'}
|
||||
assert test_table_s.get_item(Key={'p': p2}, ConsistentRead=True)['Item'] == {'p': p2, 'b': 'hello'}
|
||||
|
||||
# Basic test for BatchGetItem, reading several entire items.
|
||||
# Schema has both hash and sort keys.
|
||||
def test_batch_get_item(test_table):
|
||||
@@ -305,16 +251,3 @@ def test_batch_get_item_projection_expression(test_table):
|
||||
got_items = reply['Responses'][test_table.name]
|
||||
expected_items = [{k: item[k] for k in wanted if k in item} for item in items]
|
||||
assert multiset(got_items) == multiset(expected_items)
|
||||
|
||||
# Test that we return the required UnprocessedKeys/UnprocessedItems parameters
|
||||
def test_batch_unprocessed(test_table_s):
|
||||
p = random_string()
|
||||
write_reply = test_table_s.meta.client.batch_write_item(RequestItems = {
|
||||
test_table_s.name: [{'PutRequest': {'Item': {'p': p, 'a': 'hi'}}}],
|
||||
})
|
||||
assert 'UnprocessedItems' in write_reply and write_reply['UnprocessedItems'] == dict()
|
||||
|
||||
read_reply = test_table_s.meta.client.batch_get_item(RequestItems = {
|
||||
test_table_s.name: {'Keys': [{'p': p}], 'ProjectionExpression': 'p, a', 'ConsistentRead': True}
|
||||
})
|
||||
assert 'UnprocessedKeys' in read_reply and read_reply['UnprocessedKeys'] == dict()
|
||||
@@ -22,36 +22,9 @@
|
||||
# test_condition_expression.py. Many of the tests there are very similar to
|
||||
# the ones included here.
|
||||
|
||||
# NOTE: In this file, we use the b'xyz' syntax to represent DynamoDB's binary
|
||||
# values. This syntax works as expected only in Python3. In Python2 it
|
||||
# appears to work, but the "b" is actually ignored and the result is a normal
|
||||
# string 'xyz'. That means that we end up testing the string type instead of
|
||||
# the binary type as intended. So this test can run on Python2 but doesn't
|
||||
# cover testing binary types. The test should be run in Python3 to ensure full
|
||||
# coverage.
|
||||
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
from util import random_string
|
||||
from sys import version_info
|
||||
|
||||
# A helper function for changing write isolation policies
|
||||
def set_write_isolation(table, isolation):
|
||||
got = table.meta.client.describe_table(TableName=table.name)['Table']
|
||||
arn = got['TableArn']
|
||||
tags = [
|
||||
{
|
||||
'Key': 'system:write_isolation',
|
||||
'Value': isolation
|
||||
}
|
||||
]
|
||||
table.meta.client.tag_resource(ResourceArn=arn, Tags=tags)
|
||||
|
||||
# A helper function to clear previous isolation tags
|
||||
def clear_write_isolation(table):
|
||||
got = table.meta.client.describe_table(TableName=table.name)['Table']
|
||||
arn = got['TableArn']
|
||||
table.meta.client.untag_resource(ResourceArn=arn, TagKeys=['system:write_isolation'])
|
||||
|
||||
# Most of the tests in this file check that the ConditionExpression
|
||||
# parameter works for the UpdateItem operation. It should also work the
|
||||
@@ -88,6 +61,7 @@ def test_condition_expression_attribute_updates(test_table_s):
|
||||
# attribute from the request, and the case of comparing two different
|
||||
# attributes of the same item (the latter case wasn't possible to express
|
||||
# with Expected, and becomes possible with ConditionExpression).
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_eq_success(test_table_s):
|
||||
p = random_string()
|
||||
values = (1, "hello", True, b'xyz', None, ['hello', 42], {'hello': 'world'}, set(['hello', 'world']), set([1, 2, 3]), set([b'xyz', b'hi']))
|
||||
@@ -111,6 +85,7 @@ def test_update_condition_eq_success(test_table_s):
|
||||
|
||||
# Comparing values of *different* types should always fail. Check all the
|
||||
# combination of different types.
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_eq_different(test_table_s):
|
||||
p = random_string()
|
||||
values = (1, "hello", True, b'xyz', None, ['hello', 42], {'hello': 'world'}, set(['hello', 'world']), set([1, 2, 3]), set([b'xyz', b'hi']))
|
||||
@@ -137,6 +112,7 @@ def test_update_condition_eq_different(test_table_s):
|
||||
ExpressionAttributeValues={':val1': val1, ':val2': val2})
|
||||
|
||||
# Also check an actual case of same time, but inequality.
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_eq_unequal(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -150,6 +126,7 @@ def test_update_condition_eq_unequal(test_table_s):
|
||||
# Check that set equality is checked correctly. Unlike string equality (for
|
||||
# example), it cannot be done with just naive string comparison of the JSON
|
||||
# representation, and we need to allow for any order. (see issue #5021)
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_eq_set(test_table_s):
|
||||
p = random_string()
|
||||
# Because boto3 sorts the set values we give it, in order to generate a
|
||||
@@ -169,6 +146,7 @@ def test_update_condition_eq_set(test_table_s):
|
||||
assert 'b' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']
|
||||
|
||||
# Test for ConditionExpression with operator "<>" (non-equality),
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_ne(test_table_s):
|
||||
p = random_string()
|
||||
# We only check here one type of attributes (numbers), assuming that the
|
||||
@@ -209,6 +187,7 @@ def test_update_condition_ne(test_table_s):
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 3
|
||||
|
||||
# Test for ConditionExpression with operator "<"
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_lt(test_table_s):
|
||||
p = random_string()
|
||||
# The < operator should work for string, number and binary types
|
||||
@@ -281,6 +260,7 @@ def test_update_condition_lt(test_table_s):
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
|
||||
|
||||
# Test for ConditionExpression with operator "<="
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_le(test_table_s):
|
||||
p = random_string()
|
||||
# The <= operator should work for string, number and binary types
|
||||
@@ -344,6 +324,7 @@ def test_update_condition_le(test_table_s):
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7
|
||||
|
||||
# Test for ConditionExpression with operator ">"
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_gt(test_table_s):
|
||||
p = random_string()
|
||||
# The > operator should work for string, number and binary types
|
||||
@@ -407,6 +388,7 @@ def test_update_condition_gt(test_table_s):
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
|
||||
|
||||
# Test for ConditionExpression with operator ">="
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_ge(test_table_s):
|
||||
p = random_string()
|
||||
# The >= operator should work for string, number and binary types
|
||||
@@ -472,6 +454,7 @@ def test_update_condition_ge(test_table_s):
|
||||
# Test for ConditionExpression with ternary operator "BETWEEN" (checking
|
||||
# if a value is between two others, equality included). The keywords
|
||||
# "BETWEEN" and "AND" are case insensitive.
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_between(test_table_s):
|
||||
p = random_string()
|
||||
# The BETWEEN operator should work for string, number and binary types
|
||||
@@ -553,6 +536,7 @@ def test_update_condition_between(test_table_s):
|
||||
# Test for ConditionExpression with multi-operand operator "IN", checking
|
||||
# whether a value is equal to one of possibly many values (up to 100 should
|
||||
# be supported, according to the DynamoDB documentation).
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_in(test_table_s):
|
||||
p = random_string()
|
||||
|
||||
@@ -599,12 +583,6 @@ def test_update_condition_in(test_table_s):
|
||||
ConditionExpression='a IN (:x, :y)',
|
||||
ExpressionAttributeValues={':val': 1, ':x': 'dog', ':y': 174})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 1
|
||||
# IN with zero arguments results in a syntax error, not a failed condition
|
||||
with pytest.raises(ClientError, match='ValidationException.*yntax error'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val37',
|
||||
ConditionExpression='a IN ()',
|
||||
ExpressionAttributeValues=values)
|
||||
|
||||
# Beyond the above operators, there are also test functions supported -
|
||||
# attribute_exists, attribute_not_exists, attribute_type, begins_with,
|
||||
@@ -612,6 +590,7 @@ def test_update_condition_in(test_table_s):
|
||||
# These functions are listed and described in
|
||||
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.OperatorsAndFunctions.html
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_attribute_exists(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -626,34 +605,8 @@ def test_update_condition_attribute_exists(test_table_s):
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='attribute_exists (z)',
|
||||
ExpressionAttributeValues={':val': 3})
|
||||
# Somewhat artificially, attribute_exists() requires that its parameter
|
||||
# be a path - it cannot be a different sort of value.
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='attribute_exists (:val)',
|
||||
ExpressionAttributeValues={':val': 3})
|
||||
|
||||
# Primitive conditions usually look like an operator between two (<, <=,
|
||||
# etc.), three (BETWEEN) or more (IN) values. Can just a single value be
|
||||
# a condition? The special case of a single function call *can* be - we saw
|
||||
# an example attribute_exists(z) in the previous test. However that only
|
||||
# function calls are supported in this context - not general values (i.e.,
|
||||
# attribute or value references).
|
||||
# While DynamoDB does not accept a non-function-call value as a condition
|
||||
# (it results with with a syntax error), in Alternator currently, for
|
||||
# simplicity of the parser, this case is parsed correctly and only fails
|
||||
# later when the calculated value ends up to not be a boolean.
|
||||
def test_update_condition_single_value_attribute(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'}})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='a',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_attribute_not_exists(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -669,6 +622,7 @@ def test_update_condition_attribute_not_exists(test_table_s):
|
||||
ConditionExpression='attribute_not_exists (a)',
|
||||
ExpressionAttributeValues={':val': 3})
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_attribute_type(test_table_s):
|
||||
p = random_string()
|
||||
type_values = [
|
||||
@@ -686,10 +640,6 @@ def test_update_condition_attribute_type(test_table_s):
|
||||
test_table_s.update_item(Key={'p': p}, AttributeUpdates=updates)
|
||||
for i in range(len(type_values)):
|
||||
expected_type = type_values[i][0]
|
||||
# As explained in a comment in the top of the file, the binary types
|
||||
# cannot be tested with Python 2
|
||||
if expected_type in ('B', 'BS') and version_info[0] == 2:
|
||||
continue
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='attribute_type (a{}, :type)'.format(i),
|
||||
@@ -701,32 +651,25 @@ def test_update_condition_attribute_type(test_table_s):
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='attribute_type (a{}, :type)'.format(i),
|
||||
ExpressionAttributeValues={':val': i, ':type': wrong_type})
|
||||
# The DynamoDB documentation suggests that attribute_type()'s first
|
||||
# parameter must be a path (as we saw above, this is indeed the case for
|
||||
# attribute_exists()). But in fact, attribute_type() does work fine also
|
||||
# for an expression attribute.
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='attribute_type (:val, :type)',
|
||||
ExpressionAttributeValues={':val': 0, ':type': 'N'})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 0
|
||||
|
||||
# The DynamoDB documentation explicitly states that the second argument
|
||||
# of the attribute_type function - the type to compare to - *must* be an
|
||||
# expression attribute (:name) - it cannot be an item attribute.
|
||||
# I don't know why this was important to forbid, but this test confirms that
|
||||
# DynamoDB does forbid it.
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_attribute_type_second_arg(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
||||
'b': {'Value': 'N', 'Action': 'PUT'}})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
with pytest.raises(ClientError, match='ValidationException.*Incorrect'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='attribute_type (a, b)',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_begins_with(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -769,11 +712,12 @@ def test_update_condition_begins_with(test_table_s):
|
||||
ConditionExpression='begins_with(c, a)',
|
||||
ExpressionAttributeValues={':val': 3})
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_contains(test_table_s):
|
||||
p = random_string()
|
||||
# contains() can be used for two unrelated things: check substring (in
|
||||
# string or binary) and membership (in set or a list). The DynamoDB
|
||||
# documentation only mention string and set (not binary or list) but
|
||||
# documentation only bention string and set (not binary or list) but
|
||||
# the fact is that binary and list are also support.
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'a': {'Value': 'hello', 'Action': 'PUT'},
|
||||
@@ -808,19 +752,11 @@ def test_update_condition_contains(test_table_s):
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='contains(d, :arg)',
|
||||
ExpressionAttributeValues={':val': 4, ':arg': b'dog'})
|
||||
|
||||
|
||||
# While both operands of contains() may be item attributes, strangely
|
||||
# it is explicitly forbidden to have the same attribute as both and
|
||||
# trying to do so results in a ValidationException. I don't know why it's
|
||||
# important to make this query fail, when it could have just worked...
|
||||
# TODO: Is this limitation only for contains() or other functions as well?
|
||||
@pytest.mark.xfail(reason="extra check for same attribute not implemented yet")
|
||||
def test_update_condition_contains_same_attribute(test_table_s):
|
||||
p = random_string()
|
||||
# While both operands of contains may be item attributes, strangely
|
||||
# it is explicitly forbidden to have the same attribute as both and
|
||||
# trying to do so results in a ValidationException.
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'a1': {'Value': 'hello', 'Action': 'PUT'},
|
||||
'a': {'Value': 'hello', 'Action': 'PUT'}})
|
||||
AttributeUpdates={'a1': {'Value': 'hello', 'Action': 'PUT'}})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='contains(a, a1)',
|
||||
@@ -838,6 +774,7 @@ def test_update_condition_contains_same_attribute(test_table_s):
|
||||
# function whose return value needs to be further combined with another
|
||||
# operand using a comparison operation - and it isn't specified which is
|
||||
# supported.
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_size(test_table_s):
|
||||
p = random_string()
|
||||
# First verify what size() returns for various types. We use only the
|
||||
@@ -847,7 +784,7 @@ def test_update_condition_size(test_table_s):
|
||||
'b': {'Value': set([2, 4, 7]), 'Action': 'PUT'},
|
||||
'c': {'Value': [2, 'dog', 7], 'Action': 'PUT'},
|
||||
'd': {'Value': b'hi there', 'Action': 'PUT'},
|
||||
'e': {'Value': {'x': 2, 'y': {'m': 3, 'n': 4}}, 'Action': 'PUT'},
|
||||
'e': {'Value': {'x': 2, 'y': 3}, 'Action': 'PUT'},
|
||||
'f': {'Value': 5, 'Action': 'PUT'},
|
||||
'g': {'Value': True, 'Action': 'PUT'},
|
||||
'h': {'Value': None, 'Action': 'PUT'}})
|
||||
@@ -932,70 +869,6 @@ def test_update_condition_size(test_table_s):
|
||||
ConditionExpression='size(a)>=:arg',
|
||||
ExpressionAttributeValues={':val': 11, ':arg': 2})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 11
|
||||
# size() is only allowed one operand; More operands are allowed by the
|
||||
# parser, but later result in an error:
|
||||
with pytest.raises(ClientError, match='ValidationException.*2'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='size(a, a)=:arg',
|
||||
ExpressionAttributeValues={':val': 1, ':arg': 5})
|
||||
|
||||
# The above test tested conditions involving size() in a comparison.
|
||||
# Trying to use just size(a) as a condition (as we use the rest of the
|
||||
# functions supported by ConditionExpression) does not work - DynamoDB
|
||||
# reports # that "The function is not allowed to be used this way in an
|
||||
# expression; function: size".
|
||||
def test_update_condition_size_alone(test_table_s):
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='size(a)',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
|
||||
# Similarly, while attribute_exists(a) works alone, it cannot be used in
|
||||
# a comparison, e.g., attribute_exists(a) < 1 also causes DynamoDB to
|
||||
# complain about "The function is not allowed to be used in this way in an
|
||||
# expression.".
|
||||
def test_update_condition_attribute_exists_in_comparison(test_table_s):
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='attribute_exists(a) < :val',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
|
||||
# In essense, the size() function tested in the previous test behaves
|
||||
# exactly like the functions of UpdateExpressions, i.e., it transforms a
|
||||
# value (attribute from the item or the query) into a new value, which
|
||||
# can than be operated (in our case, compared). In this test we check
|
||||
# that other functions supported by UpdateExpression - if_not_exists()
|
||||
# and list_append() - are not supported.
|
||||
def test_update_condition_other_funcs(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'a': {'Value': 'hello', 'Action': 'PUT'}})
|
||||
# dog() is an unknown function name:
|
||||
with pytest.raises(ClientError, match='ValidationException.*function'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='dog(a)=:arg',
|
||||
ExpressionAttributeValues={':val': 1, ':arg': 5})
|
||||
# The functions if_not_exists() and list_append() are known functions
|
||||
# (they are supported in UpdateExpression) but not allowed in
|
||||
# ConditionExpression. This means we can have a single function for
|
||||
# evaluation a parsed::value, but it needs to know whether it is
|
||||
# called for a UpdateExpression or a ConditionExpression.
|
||||
with pytest.raises(ClientError, match='ValidationException.*not allowed'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='if_not_exists(a, a)=:arg',
|
||||
ExpressionAttributeValues={':val': 1, ':arg': 5})
|
||||
with pytest.raises(ClientError, match='ValidationException.*not allowed'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='list_append(a, a)=:arg',
|
||||
ExpressionAttributeValues={':val': 1, ':arg': 5})
|
||||
|
||||
# All the previous tests involved top-level attributes to be tested. But
|
||||
# ConditionExpressions also allows reading nested attributes, and we should
|
||||
@@ -1021,6 +894,7 @@ def test_update_condition_nested_attributes(test_table_s):
|
||||
# But the DynamoDB API also allows to refer to attributes using a #reference.
|
||||
# Among other things this allows using attribute names which are usually
|
||||
# reserved keywords in condition expressions.
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_attribute_reference(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1050,6 +924,7 @@ def test_update_condition_nested_attribute_reference(test_table_s):
|
||||
# precedence involved, and should be tested (see the definitions in
|
||||
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.OperatorsAndFunctions.html
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_and(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1074,6 +949,7 @@ def test_update_condition_and(test_table_s):
|
||||
ConditionExpression='a < b AND c < b',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_or(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1103,6 +979,7 @@ def test_update_condition_or(test_table_s):
|
||||
ConditionExpression='b < a OR c < b',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_not(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1125,13 +1002,8 @@ def test_update_condition_not(test_table_s):
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='NOT a < b',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
# NOT NOT NOT NOT also works (and does nothing) :-)
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :val',
|
||||
ConditionExpression='NOT NOT NOT NOT a < b',
|
||||
ExpressionAttributeValues={':val': 3})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 3
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_parentheses(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1147,6 +1019,7 @@ def test_update_condition_parentheses(test_table_s):
|
||||
# There is operator precedence that allows a user to use less parentheses.
|
||||
# We need to implement it correctly:
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_and_before_or(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1159,6 +1032,7 @@ def test_update_condition_and_before_or(test_table_s):
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 1
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_not_before_and(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1171,6 +1045,7 @@ def test_update_condition_not_before_and(test_table_s):
|
||||
ConditionExpression='NOT a < b AND c < b',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_between_before_and(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1185,6 +1060,7 @@ def test_update_condition_between_before_and(test_table_s):
|
||||
|
||||
# An empty ConditionExpression is not allowed - resulting in a validation
|
||||
# error, not a failed condition:
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_update_condition_empty(test_table_s):
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException.*empty'):
|
||||
@@ -1200,6 +1076,7 @@ def test_update_condition_empty(test_table_s):
|
||||
# used to test the condition. So we just need one test for each operation,
|
||||
# to verify that this code actually gets called.
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_delete_item_condition(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1214,6 +1091,7 @@ def test_delete_item_condition(test_table_s):
|
||||
ExpressionAttributeValues={':oldval': 1})
|
||||
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
|
||||
|
||||
@pytest.mark.xfail(reason="ConditionExpression not yet implemented")
|
||||
def test_put_item_condition(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1226,162 +1104,3 @@ def test_put_item_condition(test_table_s):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 3},
|
||||
ConditionExpression='a = :oldval',
|
||||
ExpressionAttributeValues={':oldval': 1})
|
||||
|
||||
# DynamoDB frowns upon unused entries in ExpressionAttributeValues and
|
||||
# ExpressionAttributeNames. Check that we do too (in all three operations),
|
||||
# although it's not terribly important that we be compatible with DynamoDB
|
||||
# here...
|
||||
# There's one delicate issue, though. Should we check for unused entries
|
||||
# during parsing, or during evaluation? The stage we check this changes
|
||||
# our behavior when the condition was supposed to fail. So we have two
|
||||
# separate tests here, one for failed condition and one for successful.
|
||||
# Because Alternator does this check at a different stage from DynamoDB,
|
||||
# this test currently fails.
|
||||
@pytest.mark.xfail(reason="unused entries are checked too late")
|
||||
def test_update_condition_unused_entries_failed(test_table_s):
|
||||
p = random_string()
|
||||
# unused val3:
|
||||
with pytest.raises(ClientError, match='ValidationException.*val3'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET #name1 = :val1',
|
||||
ConditionExpression='#name2 = :val2',
|
||||
ExpressionAttributeValues={':val1': 1, ':val2': 2, ':val3': 3},
|
||||
ExpressionAttributeNames={'#name1': 'a', '#name2': 'b'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*val3'):
|
||||
test_table_s.delete_item(Key={'p': p},
|
||||
ConditionExpression='#name1 = :val1',
|
||||
ExpressionAttributeValues={':val1': 1, ':val3': 3},
|
||||
ExpressionAttributeNames={'#name1': 'a'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*val3'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 3},
|
||||
ConditionExpression='#name1 = :val1',
|
||||
ExpressionAttributeValues={':val1': 1, ':val3': 3},
|
||||
ExpressionAttributeNames={'#name1': 'a'})
|
||||
# unused name3:
|
||||
with pytest.raises(ClientError, match='ValidationException.*name3'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET #name1 = :val1',
|
||||
ConditionExpression='#name2 = :val2',
|
||||
ExpressionAttributeValues={':val1': 1, ':val2': 2},
|
||||
ExpressionAttributeNames={'#name1': 'a', '#name2': 'b', '#name3': 'c'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*name3'):
|
||||
test_table_s.delete_item(Key={'p': p},
|
||||
ConditionExpression='#name1 = :val1',
|
||||
ExpressionAttributeValues={':val1': 1},
|
||||
ExpressionAttributeNames={'#name1': 'a', '#name3': 'c'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*name3'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 3},
|
||||
ConditionExpression='#name1 = :val1',
|
||||
ExpressionAttributeValues={':val1': 1},
|
||||
ExpressionAttributeNames={'#name1': 'a', '#name3': 'c'})
|
||||
def test_update_condition_unused_entries_succeeded(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'b': {'Value': 2, 'Action': 'PUT'}})
|
||||
# unused val3:
|
||||
with pytest.raises(ClientError, match='ValidationException.*val3'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET #name1 = :val1',
|
||||
ConditionExpression='#name2 = :val2',
|
||||
ExpressionAttributeValues={':val1': 1, ':val2': 2, ':val3': 3},
|
||||
ExpressionAttributeNames={'#name1': 'a', '#name2': 'b'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*val3'):
|
||||
test_table_s.delete_item(Key={'p': p},
|
||||
ConditionExpression='#name2 = :val2',
|
||||
ExpressionAttributeValues={':val2': 2, ':val3': 3},
|
||||
ExpressionAttributeNames={'#name2': 'b'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*val3'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 3},
|
||||
ConditionExpression='#name2 = :val2',
|
||||
ExpressionAttributeValues={':val2': 2, ':val3': 3},
|
||||
ExpressionAttributeNames={'#name2': 'b'})
|
||||
# unused name3:
|
||||
with pytest.raises(ClientError, match='ValidationException.*name3'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET #name1 = :val1',
|
||||
ConditionExpression='#name2 = :val2',
|
||||
ExpressionAttributeValues={':val1': 1, ':val2': 2},
|
||||
ExpressionAttributeNames={'#name1': 'a', '#name2': 'b', '#name3': 'c'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*name3'):
|
||||
test_table_s.delete_item(Key={'p': p},
|
||||
ConditionExpression='#name2 = :val2',
|
||||
ExpressionAttributeValues={':val2': 2},
|
||||
ExpressionAttributeNames={'#name2': 'b', '#name3': 'c'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*name3'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 3},
|
||||
ConditionExpression='#name2 = :val2',
|
||||
ExpressionAttributeValues={':val2': 2},
|
||||
ExpressionAttributeNames={'#name2': 'b', '#name3': 'c'})
|
||||
|
||||
# Test a bunch of cases with permissive write isolation levels,
|
||||
# i.e. LWT_ALWAYS, LWT_RMW_ONLY and UNSAFE_RMW.
|
||||
# These test cases make sense only for alternator, so they're skipped
|
||||
# when run on AWS
|
||||
def test_condition_expression_with_permissive_write_isolation(scylla_only, dynamodb, test_table_s):
|
||||
def do_test_with_permissive_isolation_levels(test_case, table, *args):
|
||||
try:
|
||||
for isolation in ['a', 'o', 'u']:
|
||||
set_write_isolation(table, isolation)
|
||||
test_case(table, *args)
|
||||
finally:
|
||||
clear_write_isolation(table)
|
||||
for test_case in [test_update_condition_eq_success, test_update_condition_attribute_exists,
|
||||
test_delete_item_condition, test_put_item_condition, test_update_condition_attribute_reference]:
|
||||
do_test_with_permissive_isolation_levels(test_case, test_table_s)
|
||||
|
||||
# Test that the forbid_rmw isolation level prevents read-modify-write requests
|
||||
# from working. These test cases make sense only for alternator, so they're skipped
|
||||
# when run on AWS
|
||||
def test_condition_expression_with_forbidden_rmw(scylla_only, dynamodb, test_table_s):
|
||||
def do_test_with_forbidden_rmw(test_case, table, *args):
|
||||
try:
|
||||
set_write_isolation(table, 'f')
|
||||
test_case(table, *args)
|
||||
assert False, "Expected an exception when running {}".format(test_case.__name__)
|
||||
except ClientError:
|
||||
pass
|
||||
finally:
|
||||
clear_write_isolation(table)
|
||||
for test_case in [test_update_condition_eq_success, test_update_condition_attribute_exists,
|
||||
test_put_item_condition, test_update_condition_attribute_reference]:
|
||||
do_test_with_forbidden_rmw(test_case, test_table_s)
|
||||
# Ensure that regular writes (without rmw) work just fine
|
||||
s = random_string()
|
||||
test_table_s.put_item(Item={'p': s, 'regular': 'write'})
|
||||
assert test_table_s.get_item(Key={'p': s}, ConsistentRead=True)['Item'] == {'p': s, 'regular': 'write'}
|
||||
test_table_s.update_item(Key={'p': s}, AttributeUpdates={'write': {'Value': 'regular', 'Action': 'PUT'}})
|
||||
assert test_table_s.get_item(Key={'p': s}, ConsistentRead=True)['Item'] == {'p': s, 'regular': 'write', 'write': 'regular'}
|
||||
|
||||
# Reproducer for issue #6573: binary strings should be ordered as unsigned
|
||||
# bytes, i.e., byte 128 comes after 127, not before as with signed bytes.
|
||||
# Test the five ordering operators: <, <=, >, >=, between
|
||||
def test_condition_expression_unsigned_bytes(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'b': bytearray([127])})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b < :oldval',
|
||||
ExpressionAttributeValues={':newval': 1, ':oldval': bytearray([128])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 1
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b <= :oldval',
|
||||
ExpressionAttributeValues={':newval': 2, ':oldval': bytearray([128])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 2
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b between :oldval1 and :oldval2',
|
||||
ExpressionAttributeValues={':newval': 3, ':oldval1': bytearray([126]), ':oldval2': bytearray([128])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 3
|
||||
|
||||
test_table_s.put_item(Item={'p': p, 'b': bytearray([128])})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b > :oldval',
|
||||
ExpressionAttributeValues={':newval': 4, ':oldval': bytearray([127])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET z = :newval',
|
||||
ConditionExpression='b >= :oldval',
|
||||
ExpressionAttributeValues={':newval': 5, ':oldval': bytearray([127])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 5
|
||||
@@ -141,6 +141,7 @@ def test_describe_table_stream_specification(test_table):
|
||||
# includes which zone it is on, which account, and of course the table's
|
||||
# name. The ARN format is described in
|
||||
# https://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html#genref-arns
|
||||
@pytest.mark.xfail(reason="DescribeTable does not return ARN")
|
||||
def test_describe_table_arn(test_table):
|
||||
got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
|
||||
assert 'TableArn' in got and got['TableArn'].startswith('arn:')
|
||||
@@ -1077,42 +1077,3 @@ def test_put_item_expected(test_table_s):
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 2}
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 3}, Expected={'a': {'Value': 1}})
|
||||
|
||||
# Reproducer for issue #6573: binary strings should be ordered as unsigned
|
||||
# bytes, i.e., byte 128 comes after 127, not before as with signed bytes.
|
||||
# Test the five ordering operators: LT, LE, GT, GE, BETWEEN
|
||||
def test_update_expected_unsigned_bytes(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'b': bytearray([127])})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 1, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'LT',
|
||||
'AttributeValueList': [bytearray([128])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 1
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 2, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'LE',
|
||||
'AttributeValueList': [bytearray([128])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 2
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 3, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'BETWEEN',
|
||||
'AttributeValueList': [bytearray([126]), bytearray([128])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 3
|
||||
|
||||
test_table_s.put_item(Item={'p': p, 'b': bytearray([128])})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 4, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'GT',
|
||||
'AttributeValueList': [bytearray([127])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 4
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'z': {'Value': 5, 'Action': 'PUT'}},
|
||||
Expected={'b': {'ComparisonOperator': 'GE',
|
||||
'AttributeValueList': [bytearray([127])]}}
|
||||
)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 5
|
||||
@@ -71,7 +71,7 @@ def test_gsi_identical(dynamodb):
|
||||
# results (in different order).
|
||||
assert multiset(items) == multiset(full_scan(table))
|
||||
assert_index_scan(table, 'hello', items)
|
||||
# We can't scan a non-existent index
|
||||
# We can't scan a non-existant index
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
full_scan(table, IndexName='wrong')
|
||||
table.delete()
|
||||
@@ -150,6 +150,7 @@ def test_gsi_missing_table(dynamodb):
|
||||
dynamodb.meta.client.scan(TableName='nonexistent_table', IndexName='any_name')
|
||||
|
||||
# Verify that strongly-consistent reads on GSI are *not* allowed.
|
||||
@pytest.mark.xfail(reason="GSI strong consistency not checked")
|
||||
def test_gsi_strong_consistency(test_table_gsi_1):
|
||||
with pytest.raises(ClientError, match='ValidationException.*Consistent'):
|
||||
full_query(test_table_gsi_1, KeyConditions={'c': {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'}}, IndexName='hello', ConsistentRead=True)
|
||||
@@ -22,7 +22,7 @@ import requests
|
||||
# Test that a health check can be performed with a GET packet
|
||||
def test_health_works(dynamodb):
|
||||
url = dynamodb.meta.client._endpoint.host
|
||||
response = requests.get(url, verify=False)
|
||||
response = requests.get(url)
|
||||
assert response.ok
|
||||
assert response.content.decode('utf-8').strip() == 'healthy: {}'.format(url.replace('https://', '').replace('http://', ''))
|
||||
|
||||
402
alternator-test/test_item.py
Normal file
402
alternator-test/test_item.py
Normal file
@@ -0,0 +1,402 @@
|
||||
# Copyright 2019 ScyllaDB
|
||||
#
|
||||
# This file is part of Scylla.
|
||||
#
|
||||
# Scylla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Scylla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Tests for the CRUD item operations: PutItem, GetItem, UpdateItem, DeleteItem
|
||||
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
from decimal import Decimal
|
||||
from util import random_string, random_bytes
|
||||
|
||||
# Basic test for creating a new item with a random name, and reading it back
|
||||
# with strong consistency.
|
||||
# Only the string type is used for keys and attributes. None of the various
|
||||
# optional PutItem features (Expected, ReturnValues, ReturnConsumedCapacity,
|
||||
# ReturnItemCollectionMetrics, ConditionalOperator, ConditionExpression,
|
||||
# ExpressionAttributeNames, ExpressionAttributeValues) are used, and
|
||||
# for GetItem strong consistency is requested as well as all attributes,
|
||||
# but no other optional features (AttributesToGet, ReturnConsumedCapacity,
|
||||
# ProjectionExpression, ExpressionAttributeNames)
|
||||
def test_basic_string_put_and_get(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
val = random_string()
|
||||
val2 = random_string()
|
||||
test_table.put_item(Item={'p': p, 'c': c, 'attribute': val, 'another': val2})
|
||||
item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert item['p'] == p
|
||||
assert item['c'] == c
|
||||
assert item['attribute'] == val
|
||||
assert item['another'] == val2
|
||||
|
||||
# Similar to test_basic_string_put_and_get, just uses UpdateItem instead of
|
||||
# PutItem. Because the item does not yet exist, it should work the same.
|
||||
def test_basic_string_update_and_get(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
val = random_string()
|
||||
val2 = random_string()
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'attribute': {'Value': val, 'Action': 'PUT'}, 'another': {'Value': val2, 'Action': 'PUT'}})
|
||||
item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert item['p'] == p
|
||||
assert item['c'] == c
|
||||
assert item['attribute'] == val
|
||||
assert item['another'] == val2
|
||||
|
||||
# Test put_item and get_item of various types for the *attributes*,
|
||||
# including both scalars as well as nested documents, lists and sets.
|
||||
# The full list of types tested here:
|
||||
# number, boolean, bytes, null, list, map, string set, number set,
|
||||
# binary set.
|
||||
# The keys are still strings.
|
||||
# Note that only top-level attributes are written and read in this test -
|
||||
# this test does not attempt to modify *nested* attributes.
|
||||
# See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/dynamodb.html
|
||||
# on how to pass these various types to Boto3's put_item().
|
||||
def test_put_and_get_attribute_types(test_table):
|
||||
key = {'p': random_string(), 'c': random_string()}
|
||||
test_items = [
|
||||
Decimal("12.345"),
|
||||
42,
|
||||
True,
|
||||
False,
|
||||
b'xyz',
|
||||
None,
|
||||
['hello', 'world', 42],
|
||||
{'hello': 'world', 'life': 42},
|
||||
{'hello': {'test': 'hi', 'hello': True, 'list': [1, 2, 'hi']}},
|
||||
set(['hello', 'world', 'hi']),
|
||||
set([1, 42, Decimal("3.14")]),
|
||||
set([b'xyz', b'hi']),
|
||||
]
|
||||
item = { str(i) : test_items[i] for i in range(len(test_items)) }
|
||||
item.update(key)
|
||||
test_table.put_item(Item=item)
|
||||
got_item = test_table.get_item(Key=key, ConsistentRead=True)['Item']
|
||||
assert item == got_item
|
||||
|
||||
# The test_empty_* tests below verify support for empty items, with no
|
||||
# attributes except the key. This is a difficult case for Scylla, because
|
||||
# for an empty row to exist, Scylla needs to add a "CQL row marker".
|
||||
# There are several ways to create empty items - via PutItem, UpdateItem
|
||||
# and deleting attributes from non-empty items, and we need to check them
|
||||
# all, in several test_empty_* tests:
|
||||
def test_empty_put(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
test_table.put_item(Item={'p': p, 'c': c})
|
||||
item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert item == {'p': p, 'c': c}
|
||||
def test_empty_put_delete(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
test_table.put_item(Item={'p': p, 'c': c, 'hello': 'world'})
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'hello': {'Action': 'DELETE'}})
|
||||
item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert item == {'p': p, 'c': c}
|
||||
def test_empty_update(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={})
|
||||
item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert item == {'p': p, 'c': c}
|
||||
def test_empty_update_delete(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'hello': {'Value': 'world', 'Action': 'PUT'}})
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'hello': {'Action': 'DELETE'}})
|
||||
item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert item == {'p': p, 'c': c}
|
||||
|
||||
# Test error handling of UpdateItem passed a bad "Action" field.
|
||||
def test_update_bad_action(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
val = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'attribute': {'Value': val, 'Action': 'NONEXISTENT'}})
|
||||
|
||||
# A more elaborate UpdateItem test, updating different attributes at different
|
||||
# times. Includes PUT and DELETE operations.
|
||||
def test_basic_string_more_update(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
val1 = random_string()
|
||||
val2 = random_string()
|
||||
val3 = random_string()
|
||||
val4 = random_string()
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a3': {'Value': val1, 'Action': 'PUT'}})
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a1': {'Value': val1, 'Action': 'PUT'}})
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a2': {'Value': val2, 'Action': 'PUT'}})
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a1': {'Value': val3, 'Action': 'PUT'}})
|
||||
test_table.update_item(Key={'p': p, 'c': c}, AttributeUpdates={'a3': {'Action': 'DELETE'}})
|
||||
item = test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item']
|
||||
assert item['p'] == p
|
||||
assert item['c'] == c
|
||||
assert item['a1'] == val3
|
||||
assert item['a2'] == val2
|
||||
assert not 'a3' in item
|
||||
|
||||
# Test that item operations on a non-existant table name fail with correct
|
||||
# error code.
|
||||
def test_item_operations_nonexistent_table(dynamodb):
|
||||
with pytest.raises(ClientError, match='ResourceNotFoundException'):
|
||||
dynamodb.meta.client.put_item(TableName='non_existent_table',
|
||||
Item={'a':{'S':'b'}})
|
||||
|
||||
# Fetching a non-existant item. According to the DynamoDB doc, "If there is no
|
||||
# matching item, GetItem does not return any data and there will be no Item
|
||||
# element in the response."
|
||||
def test_get_item_missing_item(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
assert not "Item" in test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)
|
||||
|
||||
# Test that if we have a table with string hash and sort keys, we can't read
|
||||
# or write items with other key types to it.
|
||||
def test_put_item_wrong_key_type(test_table):
|
||||
b = random_bytes()
|
||||
s = random_string()
|
||||
n = Decimal("3.14")
|
||||
# Should succeed (correct key types)
|
||||
test_table.put_item(Item={'p': s, 'c': s})
|
||||
assert test_table.get_item(Key={'p': s, 'c': s}, ConsistentRead=True)['Item'] == {'p': s, 'c': s}
|
||||
# Should fail (incorrect hash key types)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.put_item(Item={'p': b, 'c': s})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.put_item(Item={'p': n, 'c': s})
|
||||
# Should fail (incorrect sort key types)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.put_item(Item={'p': s, 'c': b})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.put_item(Item={'p': s, 'c': n})
|
||||
# Should fail (missing hash key)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.put_item(Item={'c': s})
|
||||
# Should fail (missing sort key)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.put_item(Item={'p': s})
|
||||
def test_update_item_wrong_key_type(test_table, test_table_s):
|
||||
b = random_bytes()
|
||||
s = random_string()
|
||||
n = Decimal("3.14")
|
||||
# Should succeed (correct key types)
|
||||
test_table.update_item(Key={'p': s, 'c': s}, AttributeUpdates={})
|
||||
assert test_table.get_item(Key={'p': s, 'c': s}, ConsistentRead=True)['Item'] == {'p': s, 'c': s}
|
||||
# Should fail (incorrect hash key types)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.update_item(Key={'p': b, 'c': s}, AttributeUpdates={})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.update_item(Key={'p': n, 'c': s}, AttributeUpdates={})
|
||||
# Should fail (incorrect sort key types)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.update_item(Key={'p': s, 'c': b}, AttributeUpdates={})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.update_item(Key={'p': s, 'c': n}, AttributeUpdates={})
|
||||
# Should fail (missing hash key)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.update_item(Key={'c': s}, AttributeUpdates={})
|
||||
# Should fail (missing sort key)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.update_item(Key={'p': s}, AttributeUpdates={})
|
||||
# Should fail (spurious key columns)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'p': s, 'c': s, 'spurious': s})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.get_item(Key={'p': s, 'c': s})
|
||||
def test_get_item_wrong_key_type(test_table, test_table_s):
|
||||
b = random_bytes()
|
||||
s = random_string()
|
||||
n = Decimal("3.14")
|
||||
# Should succeed (correct key types) but have empty result
|
||||
assert not "Item" in test_table.get_item(Key={'p': s, 'c': s}, ConsistentRead=True)
|
||||
# Should fail (incorrect hash key types)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'p': b, 'c': s})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'p': n, 'c': s})
|
||||
# Should fail (incorrect sort key types)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'p': s, 'c': b})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'p': s, 'c': n})
|
||||
# Should fail (missing hash key)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'c': s})
|
||||
# Should fail (missing sort key)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'p': s})
|
||||
# Should fail (spurious key columns)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.get_item(Key={'p': s, 'c': s, 'spurious': s})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.get_item(Key={'p': s, 'c': s})
|
||||
def test_delete_item_wrong_key_type(test_table, test_table_s):
|
||||
b = random_bytes()
|
||||
s = random_string()
|
||||
n = Decimal("3.14")
|
||||
# Should succeed (correct key types)
|
||||
test_table.delete_item(Key={'p': s, 'c': s})
|
||||
# Should fail (incorrect hash key types)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.delete_item(Key={'p': b, 'c': s})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.delete_item(Key={'p': n, 'c': s})
|
||||
# Should fail (incorrect sort key types)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.delete_item(Key={'p': s, 'c': b})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.delete_item(Key={'p': s, 'c': n})
|
||||
# Should fail (missing hash key)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.delete_item(Key={'c': s})
|
||||
# Should fail (missing sort key)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.delete_item(Key={'p': s})
|
||||
# Should fail (spurious key columns)
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table.delete_item(Key={'p': s, 'c': s, 'spurious': s})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.delete_item(Key={'p': s, 'c': s})
|
||||
|
||||
# Most of the tests here arbitrarily used a table with both hash and sort keys
|
||||
# (both strings). Let's check that a table with *only* a hash key works ok
|
||||
# too, for PutItem, GetItem, and UpdateItem.
|
||||
def test_only_hash_key(test_table_s):
|
||||
s = random_string()
|
||||
test_table_s.put_item(Item={'p': s, 'hello': 'world'})
|
||||
assert test_table_s.get_item(Key={'p': s}, ConsistentRead=True)['Item'] == {'p': s, 'hello': 'world'}
|
||||
test_table_s.update_item(Key={'p': s}, AttributeUpdates={'hi': {'Value': 'there', 'Action': 'PUT'}})
|
||||
assert test_table_s.get_item(Key={'p': s}, ConsistentRead=True)['Item'] == {'p': s, 'hello': 'world', 'hi': 'there'}
|
||||
|
||||
# Tests for item operations in tables with non-string hash or sort keys.
|
||||
# These tests focus only on the type of the key - everything else is as
|
||||
# simple as we can (string attributes, no special options for GetItem
|
||||
# and PutItem). These tests also focus on individual items only, and
|
||||
# not about the sort order of sort keys - this should be verified in
|
||||
# test_query.py, for example.
|
||||
def test_bytes_hash_key(test_table_b):
|
||||
# Bytes values are passed using base64 encoding, which has weird cases
|
||||
# depending on len%3 and len%4. So let's try various lengths.
|
||||
for len in range(10,18):
|
||||
p = random_bytes(len)
|
||||
val = random_string()
|
||||
test_table_b.put_item(Item={'p': p, 'attribute': val})
|
||||
assert test_table_b.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'attribute': val}
|
||||
def test_bytes_sort_key(test_table_sb):
|
||||
p = random_string()
|
||||
c = random_bytes()
|
||||
val = random_string()
|
||||
test_table_sb.put_item(Item={'p': p, 'c': c, 'attribute': val})
|
||||
assert test_table_sb.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c, 'attribute': val}
|
||||
|
||||
# Tests for using a large binary blob as hash key, sort key, or attribute.
|
||||
# DynamoDB strictly limits the size of the binary hash key to 2048 bytes,
|
||||
# and binary sort key to 1024 bytes, and refuses anything larger. The total
|
||||
# size of an item is limited to 400KB, which also limits the size of the
|
||||
# largest attributes. For more details on these limits, see
|
||||
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
|
||||
# Alternator currently does *not* have these limitations, and can accept much
|
||||
# larger keys and attributes, but what we do in the following tests is to verify
|
||||
# that items up to DynamoDB's maximum sizes also work well in Alternator.
|
||||
def test_large_blob_hash_key(test_table_b):
|
||||
b = random_bytes(2048)
|
||||
test_table_b.put_item(Item={'p': b})
|
||||
assert test_table_b.get_item(Key={'p': b}, ConsistentRead=True)['Item'] == {'p': b}
|
||||
def test_large_blob_sort_key(test_table_sb):
|
||||
s = random_string()
|
||||
b = random_bytes(1024)
|
||||
test_table_sb.put_item(Item={'p': s, 'c': b})
|
||||
assert test_table_sb.get_item(Key={'p': s, 'c': b}, ConsistentRead=True)['Item'] == {'p': s, 'c': b}
|
||||
def test_large_blob_attribute(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
b = random_bytes(409500) # a bit less than 400KB
|
||||
test_table.put_item(Item={'p': p, 'c': c, 'attribute': b })
|
||||
assert test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c, 'attribute': b}
|
||||
|
||||
# Checks what it is not allowed to use in a single UpdateItem request both
|
||||
# old-style AttributeUpdates and new-style UpdateExpression.
|
||||
def test_update_item_two_update_methods(test_table_s):
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'a': {'Value': 3, 'Action': 'PUT'}},
|
||||
UpdateExpression='SET b = :val1',
|
||||
ExpressionAttributeValues={':val1': 4})
|
||||
|
||||
# Verify that having neither AttributeUpdates nor UpdateExpression is
|
||||
# allowed, and results in creation of an empty item.
|
||||
def test_update_item_no_update_method(test_table_s):
|
||||
p = random_string()
|
||||
assert not "Item" in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
|
||||
test_table_s.update_item(Key={'p': p})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p}
|
||||
|
||||
# Test GetItem with the AttributesToGet parameter. Result should include the
|
||||
# selected attributes only - if one wants the key attributes as well, one
|
||||
# needs to select them explicitly. When no key attributes are selected,
|
||||
# some items may have *none* of the selected attributes. Those items are
|
||||
# returned too, as empty items - they are not outright missing.
|
||||
def test_getitem_attributes_to_get(dynamodb, test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
item = {'p': p, 'c': c, 'a': 'hello', 'b': 'hi'}
|
||||
test_table.put_item(Item=item)
|
||||
for wanted in [ ['a'], # only non-key attribute
|
||||
['c', 'a'], # a key attribute (sort key) and non-key
|
||||
['p', 'c'], # entire key
|
||||
['nonexistent'] # Our item doesn't have this
|
||||
]:
|
||||
got_item = test_table.get_item(Key={'p': p, 'c': c}, AttributesToGet=wanted, ConsistentRead=True)['Item']
|
||||
expected_item = {k: item[k] for k in wanted if k in item}
|
||||
assert expected_item == got_item
|
||||
|
||||
# Basic test for DeleteItem, with hash key only
|
||||
def test_delete_item_hash(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p})
|
||||
assert 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
|
||||
test_table_s.delete_item(Key={'p': p})
|
||||
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
|
||||
|
||||
# Basic test for DeleteItem, with hash and sort key
|
||||
def test_delete_item_sort(test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
key = {'p': p, 'c': c}
|
||||
test_table.put_item(Item=key)
|
||||
assert 'Item' in test_table.get_item(Key=key, ConsistentRead=True)
|
||||
test_table.delete_item(Key=key)
|
||||
assert not 'Item' in test_table.get_item(Key=key, ConsistentRead=True)
|
||||
|
||||
# Test that PutItem completely replaces an existing item. It shouldn't merge
|
||||
# it with a previously existing value, as UpdateItem does!
|
||||
# We test for a table with just hash key, and for a table with both hash and
|
||||
# sort keys.
|
||||
def test_put_item_replace(test_table_s, test_table):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 'hi'}
|
||||
test_table_s.put_item(Item={'p': p, 'b': 'hello'})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': 'hello'}
|
||||
c = random_string()
|
||||
test_table.put_item(Item={'p': p, 'c': c, 'a': 'hi'})
|
||||
assert test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c, 'a': 'hi'}
|
||||
test_table.put_item(Item={'p': p, 'c': c, 'b': 'hello'})
|
||||
assert test_table.get_item(Key={'p': p, 'c': c}, ConsistentRead=True)['Item'] == {'p': p, 'c': c, 'b': 'hello'}
|
||||
@@ -26,23 +26,26 @@ import time
|
||||
from botocore.exceptions import ClientError, ParamValidationError
|
||||
from util import create_test_table, random_string, full_scan, full_query, multiset, list_tables
|
||||
|
||||
# LSIs support strongly-consistent reads, so the following functions do not
|
||||
# need to retry like we did in test_gsi.py for GSIs:
|
||||
# Currently, Alternator's LSIs only support eventually consistent reads, so tests
|
||||
# that involve writing to a table and then expect to read something from it cannot
|
||||
# be guaranteed to succeed without retrying the read. The following utility
|
||||
# functions make it easy to write such tests.
|
||||
def assert_index_query(table, index_name, expected_items, **kwargs):
|
||||
assert multiset(expected_items) == multiset(full_query(table, IndexName=index_name, ConsistentRead=True, **kwargs))
|
||||
def assert_index_scan(table, index_name, expected_items, **kwargs):
|
||||
assert multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, ConsistentRead=True, **kwargs))
|
||||
|
||||
# A version doing retries instead of ConsistentRead, to be used just for the
|
||||
# one test below which has both GSI and LSI:
|
||||
def retrying_assert_index_query(table, index_name, expected_items, **kwargs):
|
||||
for i in range(3):
|
||||
if multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs)):
|
||||
return
|
||||
print('retrying_assert_index_query retrying')
|
||||
print('assert_index_query retrying')
|
||||
time.sleep(1)
|
||||
assert multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs))
|
||||
|
||||
def assert_index_scan(table, index_name, expected_items, **kwargs):
|
||||
for i in range(3):
|
||||
if multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs)):
|
||||
return
|
||||
print('assert_index_scan retrying')
|
||||
time.sleep(1)
|
||||
assert multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs))
|
||||
|
||||
# Although quite silly, it is actually allowed to create an index which is
|
||||
# identical to the base table.
|
||||
def test_lsi_identical(dynamodb):
|
||||
@@ -63,7 +66,7 @@ def test_lsi_identical(dynamodb):
|
||||
# results (in different order).
|
||||
assert multiset(items) == multiset(full_scan(table))
|
||||
assert_index_scan(table, 'hello', items)
|
||||
# We can't scan a non-existent index
|
||||
# We can't scan a non-existant index
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
full_scan(table, IndexName='wrong')
|
||||
table.delete()
|
||||
@@ -299,11 +302,13 @@ def test_lsi_consistent_read(test_table_lsi_1):
|
||||
expected_items = [i for i in items if i['p'] == p1 and i['b'] == b1]
|
||||
assert_index_query(test_table_lsi_1, 'hello', expected_items,
|
||||
KeyConditions={'p': {'AttributeValueList': [p1], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b1], 'ComparisonOperator': 'EQ'}})
|
||||
'b': {'AttributeValueList': [b1], 'ComparisonOperator': 'EQ'}},
|
||||
ConsistentRead=True)
|
||||
expected_items = [i for i in items if i['p'] == p2 and i['b'] == b2]
|
||||
assert_index_query(test_table_lsi_1, 'hello', expected_items,
|
||||
KeyConditions={'p': {'AttributeValueList': [p2], 'ComparisonOperator': 'EQ'},
|
||||
'b': {'AttributeValueList': [b2], 'ComparisonOperator': 'EQ'}})
|
||||
'b': {'AttributeValueList': [b2], 'ComparisonOperator': 'EQ'}},
|
||||
ConsistentRead=True)
|
||||
|
||||
# A table with both gsi and lsi present
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -355,6 +360,6 @@ def test_lsi_and_gsi(test_table_lsi_gsi):
|
||||
|
||||
for index in ['hello_g1', 'hello_l1']:
|
||||
expected_items = [i for i in items if i['p'] == p1 and i['x1'] == x1]
|
||||
retrying_assert_index_query(test_table_lsi_gsi, index, expected_items,
|
||||
assert_index_query(test_table_lsi_gsi, index, expected_items,
|
||||
KeyConditions={'p': {'AttributeValueList': [p1], 'ComparisonOperator': 'EQ'},
|
||||
'x1': {'AttributeValueList': [x1], 'ComparisonOperator': 'EQ'}})
|
||||
@@ -134,10 +134,10 @@ def test_projection_expression_path(test_table_s):
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0]')['Item'] == {'a': {'b': [2]}}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[2]')['Item'] == {'a': {'b': [{'x': 'hi', 'y': 'yo'}]}}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[2].y')['Item'] == {'a': {'b': [{'y': 'yo'}]}}
|
||||
# Trying to read any sort of non-existent attribute returns an empty item.
|
||||
# Trying to read any sort of non-existant attribute returns an empty item.
|
||||
# This includes a non-existing top-level attribute, an attempt to read
|
||||
# beyond the end of an array or a non-existent member of a dictionary, as
|
||||
# well as paths which begin with a non-existent prefix.
|
||||
# beyond the end of an array or a non-existant member of a dictionary, as
|
||||
# well as paths which begin with a non-existant prefix.
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='x')['Item'] == {}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[3]')['Item'] == {}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')['Item'] == {}
|
||||
@@ -100,13 +100,16 @@ def test_query_basic_restrictions(dynamodb, filled_test_table):
|
||||
print(got_items)
|
||||
assert multiset([item for item in items if item['p'] == 'long' and item['c'].startswith('11')]) == multiset(got_items)
|
||||
|
||||
def test_query_nonexistent_table(dynamodb):
|
||||
client = dynamodb.meta.client
|
||||
with pytest.raises(ClientError, match="ResourceNotFoundException"):
|
||||
client.query(TableName="i_do_not_exist", KeyConditions={
|
||||
'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'},
|
||||
'c' : {'AttributeValueList': ['11'], 'ComparisonOperator': 'BEGINS_WITH'}
|
||||
})
|
||||
# Test that KeyConditionExpression parameter is supported
|
||||
@pytest.mark.xfail(reason="KeyConditionExpression not supported yet")
|
||||
def test_query_key_condition_expression(dynamodb, filled_test_table):
|
||||
test_table, items = filled_test_table
|
||||
paginator = dynamodb.meta.client.get_paginator('query')
|
||||
got_items = []
|
||||
for page in paginator.paginate(TableName=test_table.name, KeyConditionExpression=Key("p").eq("long") & Key("c").lt("12")):
|
||||
got_items += page['Items']
|
||||
print(got_items)
|
||||
assert multiset([item for item in items if item['p'] == 'long' and item['c'] < '12']) == multiset(got_items)
|
||||
|
||||
def test_begins_with(dynamodb, test_table):
|
||||
paginator = dynamodb.meta.client.get_paginator('query')
|
||||
@@ -459,6 +462,7 @@ def test_query_limit_paging(test_table_sn):
|
||||
# return items sorted in reverse order. Combining this with Limit can
|
||||
# be used to return the last items instead of the first items of the
|
||||
# partition.
|
||||
@pytest.mark.xfail(reason="ScanIndexForward not supported yet")
|
||||
def test_query_reverse(test_table_sn):
|
||||
numbers = [Decimal(i) for i in range(20)]
|
||||
# Insert these numbers, in random order, into one partition:
|
||||
@@ -493,6 +497,7 @@ def test_query_reverse(test_table_sn):
|
||||
|
||||
# Test that paging also works properly with reverse order
|
||||
# (ScanIndexForward=false), i.e., reverse-order queries can be resumed
|
||||
@pytest.mark.xfail(reason="ScanIndexForward not supported yet")
|
||||
def test_query_reverse_paging(test_table_sn):
|
||||
numbers = [Decimal(i) for i in range(20)]
|
||||
# Insert these numbers, in random order, into one partition:
|
||||
@@ -509,11 +514,3 @@ def test_query_reverse_paging(test_table_sn):
|
||||
got_items = full_query(test_table_sn, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ScanIndexForward=False, Limit=limit)
|
||||
got_sort_keys = [x['c'] for x in got_items]
|
||||
assert got_sort_keys == reversed_numbers
|
||||
|
||||
# A query without a KeyConditions or KeyConditionExpress is, or an empty
|
||||
# one, is obviously not allowed:
|
||||
def test_query_missing_key(test_table):
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
full_query(test_table, KeyConditions={})
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
full_query(test_table)
|
||||
226
alternator-test/test_returnvalues.py
Normal file
226
alternator-test/test_returnvalues.py
Normal file
@@ -0,0 +1,226 @@
|
||||
# Copyright 2019 ScyllaDB
|
||||
#
|
||||
# This file is part of Scylla.
|
||||
#
|
||||
# Scylla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Scylla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Tests for the ReturnValues parameter for the different update operations
|
||||
# (PutItem, UpdateItem, DeleteItem).
|
||||
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
from util import random_string
|
||||
|
||||
# Test trivial support for the ReturnValues parameter in PutItem, UpdateItem
|
||||
# and DeleteItem - test that "NONE" works (and changes nothing), while a
|
||||
# completely unsupported value gives an error.
|
||||
# This test is useful to check that before the ReturnValues parameter is fully
|
||||
# implemented, it returns an error when a still-unsupported ReturnValues
|
||||
# option is attempted in the request - instead of simply being ignored.
|
||||
def test_trivial_returnvalues(test_table_s):
|
||||
# PutItem:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
ret=test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='NONE')
|
||||
assert not 'Attributes' in ret
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='DOG')
|
||||
# UpdateItem:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='NONE',
|
||||
UpdateExpression='SET b = :val',
|
||||
ExpressionAttributeValues={':val': 'cat'})
|
||||
assert not 'Attributes' in ret
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p}, ReturnValues='DOG',
|
||||
UpdateExpression='SET a = a + :val',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
# DeleteItem:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
ret=test_table_s.delete_item(Key={'p': p}, ReturnValues='NONE')
|
||||
assert not 'Attributes' in ret
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.delete_item(Key={'p': p}, ReturnValues='DOG')
|
||||
|
||||
# Test the ReturnValues parameter on a PutItem operation. Only two settings
|
||||
# are supported for this parameter for this operation: NONE (the default)
|
||||
# and ALL_OLD.
|
||||
@pytest.mark.xfail(reason="ReturnValues not supported")
|
||||
def test_put_item_returnvalues(test_table_s):
|
||||
# By default, the previous value of an item is not returned:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
ret=test_table_s.put_item(Item={'p': p, 'a': 'hello'})
|
||||
assert not 'Attributes' in ret
|
||||
# Using ReturnValues=NONE is the same:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
ret=test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='NONE')
|
||||
assert not 'Attributes' in ret
|
||||
# With ReturnValues=ALL_OLD, the old value of the item is returned
|
||||
# in an "Attributes" attribute:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
ret=test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='ALL_OLD')
|
||||
assert ret['Attributes'] == {'p': p, 'a': 'hi'}
|
||||
# Other ReturnValue options - UPDATED_OLD, ALL_NEW, UPDATED_NEW,
|
||||
# are supported by other operations but not by PutItem:
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='UPDATED_OLD')
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='ALL_NEW')
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='UPDATED_NEW')
|
||||
# Also, obviously, a non-supported setting "DOG" also returns in error:
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='DOG')
|
||||
# The ReturnValues value is case sensitive, so while "NONE" is supported
|
||||
# (and tested above), "none" isn't:
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'}, ReturnValues='none')
|
||||
|
||||
# Test the ReturnValues parameter on a DeleteItem operation. Only two settings
|
||||
# are supported for this parameter for this operation: NONE (the default)
|
||||
# and ALL_OLD.
|
||||
@pytest.mark.xfail(reason="ReturnValues not supported")
|
||||
def test_delete_item_returnvalues(test_table_s):
|
||||
# By default, the previous value of an item is not returned:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
ret=test_table_s.delete_item(Key={'p': p})
|
||||
assert not 'Attributes' in ret
|
||||
# Using ReturnValues=NONE is the same:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
ret=test_table_s.delete_item(Key={'p': p}, ReturnValues='NONE')
|
||||
assert not 'Attributes' in ret
|
||||
# With ReturnValues=ALL_OLD, the old value of the item is returned
|
||||
# in an "Attributes" attribute:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi'})
|
||||
ret=test_table_s.delete_item(Key={'p': p}, ReturnValues='ALL_OLD')
|
||||
assert ret['Attributes'] == {'p': p, 'a': 'hi'}
|
||||
# Other ReturnValue options - UPDATED_OLD, ALL_NEW, UPDATED_NEW,
|
||||
# are supported by other operations but not by PutItem:
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.delete_item(Key={'p': p}, ReturnValues='UPDATE_OLD')
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.delete_item(Key={'p': p}, ReturnValues='ALL_NEW')
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.delete_item(Key={'p': p}, ReturnValues='UPDATE_NEW')
|
||||
# Also, obviously, a non-supported setting "DOG" also returns in error:
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.delete_item(Key={'p': p}, ReturnValues='DOG')
|
||||
# The ReturnValues value is case sensitive, so while "NONE" is supported
|
||||
# (and tested above), "none" isn't:
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.delete_item(Key={'p': p}, ReturnValues='none')
|
||||
|
||||
# Test the ReturnValues parameter on a UpdateItem operation. All five
|
||||
# settings are supported for this parameter for this operation: NONE
|
||||
# (the default), ALL_OLD, UPDATED_OLD, ALL_NEW and UPDATED_NEW.
|
||||
@pytest.mark.xfail(reason="ReturnValues not supported")
|
||||
def test_update_item_returnvalues(test_table_s):
|
||||
# By default, the previous value of an item is not returned:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
|
||||
ret=test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET b = :val',
|
||||
ExpressionAttributeValues={':val': 'cat'})
|
||||
assert not 'Attributes' in ret
|
||||
|
||||
# Using ReturnValues=NONE is the same:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='NONE',
|
||||
UpdateExpression='SET b = :val',
|
||||
ExpressionAttributeValues={':val': 'cat'})
|
||||
assert not 'Attributes' in ret
|
||||
|
||||
# With ReturnValues=ALL_OLD, the entire old value of the item (even
|
||||
# attributes we did not modify) is returned in an "Attributes" attribute:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='ALL_OLD',
|
||||
UpdateExpression='SET b = :val',
|
||||
ExpressionAttributeValues={':val': 'cat'})
|
||||
assert ret['Attributes'] == {'p': p, 'a': 'hi', 'b': 'dog'}
|
||||
|
||||
# With ReturnValues=UPDATED_OLD, only the overwritten attributes of the
|
||||
# old item are returned in an "Attributes" attribute:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
|
||||
UpdateExpression='SET b = :val, c = :val2',
|
||||
ExpressionAttributeValues={':val': 'cat', ':val2': 'hello'})
|
||||
assert ret['Attributes'] == {'b': 'dog'}
|
||||
# Even if an update overwrites an attribute by the same value again,
|
||||
# this is considered an update, and the old value (identical to the
|
||||
# new one) is returned:
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
|
||||
UpdateExpression='SET b = :val',
|
||||
ExpressionAttributeValues={':val': 'cat'})
|
||||
assert ret['Attributes'] == {'b': 'cat'}
|
||||
# Deleting an attribute also counts as overwriting it, of course:
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
|
||||
UpdateExpression='REMOVE b')
|
||||
assert ret['Attributes'] == {'b': 'cat'}
|
||||
|
||||
# With ReturnValues=ALL_NEW, the entire new value of the item (including
|
||||
# old attributes we did not modify) is returned:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='ALL_NEW',
|
||||
UpdateExpression='SET b = :val',
|
||||
ExpressionAttributeValues={':val': 'cat'})
|
||||
assert ret['Attributes'] == {'p': p, 'a': 'hi', 'b': 'cat'}
|
||||
|
||||
# With ReturnValues=UPDATED_NEW, only the new value of the updated
|
||||
# attributes are returned. Note that "updated attributes" means
|
||||
# the newly set attributes - it doesn't require that these attributes
|
||||
# have any previous values
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hi', 'b': 'dog'})
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
|
||||
UpdateExpression='SET b = :val, c = :val2',
|
||||
ExpressionAttributeValues={':val': 'cat', ':val2': 'hello'})
|
||||
assert ret['Attributes'] == {'b': 'cat', 'c': 'hello'}
|
||||
# Deleting an attribute also counts as overwriting it, but the delete
|
||||
# column is not returned in the response - so it's empty in this case.
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
|
||||
UpdateExpression='REMOVE b')
|
||||
assert not 'Attributes' in ret
|
||||
# In the above examples, UPDATED_NEW is not useful because it just
|
||||
# returns the new values we already know from the request... UPDATED_NEW
|
||||
# becomes more useful in read-modify-write operations:
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 1})
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
|
||||
UpdateExpression='SET a = a + :val',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
assert ret['Attributes'] == {'a': 2}
|
||||
|
||||
# A non-supported setting "DOG" also returns in error:
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p}, ReturnValues='DOG',
|
||||
UpdateExpression='SET a = a + :val',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
# The ReturnValues value is case sensitive, so while "NONE" is supported
|
||||
# (and tested above), "none" isn't:
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p}, ReturnValues='none',
|
||||
UpdateExpression='SET a = a + :val',
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
@@ -42,11 +42,6 @@ def test_scan_basic(filled_test_table):
|
||||
assert len(items) == len(got_items)
|
||||
assert multiset(items) == multiset(got_items)
|
||||
|
||||
def test_scan_nonexistent_table(dynamodb):
|
||||
client = dynamodb.meta.client
|
||||
with pytest.raises(ClientError, match="ResourceNotFoundException"):
|
||||
client.scan(TableName="i_do_not_exist")
|
||||
|
||||
def test_scan_with_paginator(dynamodb, filled_test_table):
|
||||
test_table, items = filled_test_table
|
||||
paginator = dynamodb.meta.client.get_paginator('scan')
|
||||
@@ -244,6 +239,7 @@ def test_scan_select(filled_test_table):
|
||||
# a scan into multiple parts, and that these parts are in fact disjoint,
|
||||
# and their union is the entire contents of the table. We do not actually
|
||||
# try to run these queries in *parallel* in this test.
|
||||
@pytest.mark.xfail(reason="parallel scan not supported yet")
|
||||
def test_scan_parallel(filled_test_table):
|
||||
test_table, items = filled_test_table
|
||||
for nsegments in [1, 2, 17]:
|
||||
@@ -254,14 +250,3 @@ def test_scan_parallel(filled_test_table):
|
||||
# The following comparison verifies that each of the expected item
|
||||
# in items was returned in one - and just one - of the segments.
|
||||
assert multiset(items) == multiset(got_items)
|
||||
|
||||
# Test correct handling of incorrect parallel scan parameters.
|
||||
# Most of the corner cases (like TotalSegments=0) are validated
|
||||
# by boto3 itself, but some checks can still be performed.
|
||||
def test_scan_parallel_incorrect(filled_test_table):
|
||||
test_table, items = filled_test_table
|
||||
with pytest.raises(ClientError, match='ValidationException.*Segment'):
|
||||
full_scan(test_table, TotalSegments=1000001, Segment=0)
|
||||
for segment in [7, 9]:
|
||||
with pytest.raises(ClientError, match='ValidationException.*Segment'):
|
||||
full_scan(test_table, TotalSegments=5, Segment=segment)
|
||||
@@ -74,11 +74,6 @@ def create_and_delete_table(dynamodb, name, **kwargs):
|
||||
def test_create_and_delete_table(dynamodb):
|
||||
create_and_delete_table(dynamodb, 'alternator_test')
|
||||
|
||||
# Test that recreating a table right after deleting it works without issues
|
||||
def test_recreate_table(dynamodb):
|
||||
create_and_delete_table(dynamodb, 'alternator_recr_test')
|
||||
create_and_delete_table(dynamodb, 'alternator_recr_test')
|
||||
|
||||
# DynamoDB documentation specifies that table names must be 3-255 characters,
|
||||
# and match the regex [a-zA-Z0-9._-]+. Names not matching these rules should
|
||||
# be rejected, and no table be created.
|
||||
@@ -232,35 +227,6 @@ def test_create_table_billing_mode_errors(dynamodb, test_table):
|
||||
KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
|
||||
AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }])
|
||||
|
||||
# Even before Alternator gains full support for the DynamoDB stream API
|
||||
# and CreateTable's StreamSpecification option, we should support the
|
||||
# options which mean it is turned *off*.
|
||||
def test_table_streams_off(dynamodb):
|
||||
# If StreamSpecification is given, but has StreamEnabled=false, it's as
|
||||
# if StreamSpecification was missing. StreamViewType isn't needed.
|
||||
table = create_test_table(dynamodb, StreamSpecification={'StreamEnabled': False},
|
||||
KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
|
||||
AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }]);
|
||||
table.delete();
|
||||
# DynamoDB doesn't allow StreamSpecification to be empty map - if it
|
||||
# exists, it must have a StreamEnabled
|
||||
# Unfortunately, new versions of boto3 doesn't let us pass this...
|
||||
#with pytest.raises(ClientError, match='ValidationException'):
|
||||
# table = create_test_table(dynamodb, StreamSpecification={},
|
||||
# KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
|
||||
# AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }]);
|
||||
# table.delete();
|
||||
# Unfortunately, boto3 doesn't allow us to pass StreamSpecification=None.
|
||||
# This is what we had in issue #5796.
|
||||
|
||||
@pytest.mark.xfail(reason="streams not yet implemented")
|
||||
def test_table_streams_on(dynamodb):
|
||||
table = create_test_table(dynamodb,
|
||||
StreamSpecification={'StreamEnabled': True, 'StreamViewType': 'OLD_IMAGE'},
|
||||
KeySchema=[{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
|
||||
AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }]);
|
||||
table.delete();
|
||||
|
||||
# Our first implementation had a special column name called "attrs" where
|
||||
# we stored a map for all non-key columns. If the user tried to name one
|
||||
# of the key columns with this same name, the result was a disaster - Scylla
|
||||
@@ -370,7 +370,7 @@ def test_update_expression_cannot_modify_key(test_table):
|
||||
|
||||
# Test that trying to start an expression with some nonsense like HELLO
|
||||
# instead of SET, REMOVE, ADD or DELETE, fails.
|
||||
def test_update_expression_non_existent_clause(test_table_s):
|
||||
def test_update_expression_non_existant_clause(test_table_s):
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -717,28 +717,10 @@ def test_update_expression_delete_sets(test_table_s):
|
||||
UpdateExpression='DELETE a :val1',
|
||||
ExpressionAttributeValues={':val1': set(['pig'])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == set(['dog'])
|
||||
# Deleting all the elements cannot leave an empty set (which isn't
|
||||
# supported). Rather, it deletes the attribute altogether:
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='DELETE a :val1',
|
||||
ExpressionAttributeValues={':val1': set(['dog'])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': 'hi'}
|
||||
# Deleting elements from a non-existent attribute is allowed, and
|
||||
# simply does nothing:
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='DELETE a :val1',
|
||||
ExpressionAttributeValues={':val1': set(['dog'])})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': 'hi'}
|
||||
# An empty set parameter is not allowed
|
||||
with pytest.raises(ClientError, match='ValidationException.*empty'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='DELETE a :val1',
|
||||
ExpressionAttributeValues={':val1': set([])})
|
||||
# The value to be deleted must be a set of the same type - it can't
|
||||
# be a single element or anything else. If the value has the wrong type,
|
||||
# we get an error like "Invalid UpdateExpression: Incorrect operand type
|
||||
# for operator or function; operator: DELETE, operand type: STRING".
|
||||
test_table_s.put_item(Item={'p': p, 'a': set(['dog', 'cat', 'mouse']), 'b': 'hi'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*type'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='DELETE a :val1',
|
||||
@@ -870,25 +852,3 @@ def test_nested_attribute_update_bad_path_array(test_table_s):
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[0] = :val1',
|
||||
ExpressionAttributeValues={':val1': 7})
|
||||
|
||||
# DynamoDB Does not allow empty strings, empty byte arrays, or empty sets.
|
||||
# Trying to ask UpdateItem to put one of these in an attribute should be
|
||||
# forbidden. Empty lists and maps *are* allowed.
|
||||
# Note that in test_item.py::test_update_item_empty_attribute we checked
|
||||
# this with the AttributeUpdates syntax. Here we check the same with the
|
||||
# UpdateExpression syntax.
|
||||
def test_update_expression_empty_attribute(test_table_s):
|
||||
p = random_string()
|
||||
# Empty string, byte array and set are *not* allowed
|
||||
for v in ['', bytearray('', 'utf-8'), set()]:
|
||||
with pytest.raises(ClientError, match='ValidationException.*empty'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET a = :v',
|
||||
ExpressionAttributeValues={':v': v})
|
||||
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True)
|
||||
# But empty lists and maps *are* allowed:
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET d = :v1, e = :v2',
|
||||
ExpressionAttributeValues={':v1': [], ':v2': {}})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'd': [], 'e': {}}
|
||||
#
|
||||
@@ -84,9 +84,8 @@ def freeze(item):
|
||||
def multiset(items):
|
||||
return collections.Counter([freeze(item) for item in items])
|
||||
|
||||
# NOTE: alternator_Test prefix contains a capital letter on purpose,
|
||||
#in order to validate case sensitivity in alternator
|
||||
test_table_prefix = 'alternator_Test_'
|
||||
|
||||
test_table_prefix = 'alternator_test_'
|
||||
def test_table_name():
|
||||
current_ms = int(round(time.time() * 1000))
|
||||
# In the off chance that test_table_name() is called twice in the same millisecond...
|
||||
@@ -130,7 +130,7 @@ future<std::string> get_key_from_roles(cql3::query_processor& qp, std::string us
|
||||
|
||||
auto cl = auth::password_authenticator::consistency_for_user(username);
|
||||
auto timeout = auth::internal_distributed_timeout_config();
|
||||
return qp.execute_internal(query, cl, timeout, {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
return qp.process(query, cl, timeout, {sstring(username)}, true).then_wrapped([username = std::move(username)] (future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
auto res = f.get0();
|
||||
auto salted_hash = std::optional<sstring>();
|
||||
if (res->empty()) {
|
||||
|
||||
@@ -30,11 +30,6 @@
|
||||
#include "serialization.hh"
|
||||
#include "base64.hh"
|
||||
#include <stdexcept>
|
||||
#include <boost/algorithm/cxx11/all_of.hpp>
|
||||
#include <boost/algorithm/cxx11/any_of.hpp>
|
||||
#include "utils/overloaded_functor.hh"
|
||||
|
||||
#include "expressions_eval.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
@@ -76,7 +71,7 @@ static ::shared_ptr<cql3::restrictions::single_column_restriction::contains> mak
|
||||
}
|
||||
|
||||
static ::shared_ptr<cql3::restrictions::single_column_restriction::EQ> make_key_eq_restriction(const column_definition& cdef, const rjson::value& value) {
|
||||
bytes raw_value = get_key_from_typed_value(value, cdef);
|
||||
bytes raw_value = get_key_from_typed_value(value, cdef, type_to_string(cdef.type));
|
||||
auto restriction_value = ::make_shared<cql3::constants::value>(cql3::raw_value::make_value(std::move(raw_value)));
|
||||
return make_shared<cql3::restrictions::single_column_restriction::EQ>(cdef, std::move(restriction_value));
|
||||
}
|
||||
@@ -230,12 +225,16 @@ static bool check_BEGINS_WITH(const rjson::value* v1, const rjson::value& v2) {
|
||||
}
|
||||
}
|
||||
|
||||
static std::string_view to_string_view(const rjson::value& v) {
|
||||
return std::string_view(v.GetString(), v.GetStringLength());
|
||||
}
|
||||
|
||||
static bool is_set_of(const rjson::value& type1, const rjson::value& type2) {
|
||||
return (type2 == "S" && type1 == "SS") || (type2 == "N" && type1 == "NS") || (type2 == "B" && type1 == "BS");
|
||||
}
|
||||
|
||||
// Check if two JSON-encoded values match with the CONTAINS relation
|
||||
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2) {
|
||||
static bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2) {
|
||||
if (!v1) {
|
||||
return false;
|
||||
}
|
||||
@@ -247,7 +246,7 @@ bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2) {
|
||||
"got {} instead", kv2.name));
|
||||
}
|
||||
if (kv1.name == "S" && kv2.name == "S") {
|
||||
return rjson::to_string_view(kv1.value).find(rjson::to_string_view(kv2.value)) != std::string_view::npos;
|
||||
return to_string_view(kv1.value).find(to_string_view(kv2.value)) != std::string_view::npos;
|
||||
} else if (kv1.name == "B" && kv2.name == "B") {
|
||||
return base64_decode(kv1.value).find(base64_decode(kv2.value)) != bytes::npos;
|
||||
} else if (is_set_of(kv1.name, kv2.name)) {
|
||||
@@ -307,19 +306,6 @@ static bool check_IN(const rjson::value* val, const rjson::value& array) {
|
||||
return have_match;
|
||||
}
|
||||
|
||||
// Another variant of check_IN, this one for ConditionExpression. It needs to
|
||||
// check whether the first element in the given vector is equal to any of the
|
||||
// others.
|
||||
static bool check_IN(const std::vector<rjson::value>& array) {
|
||||
const rjson::value* first = &array[0];
|
||||
for (unsigned i = 1; i < array.size(); i++) {
|
||||
if (check_EQ(first, array[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool check_NULL(const rjson::value* val) {
|
||||
return val == nullptr;
|
||||
}
|
||||
@@ -365,35 +351,31 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
|
||||
|
||||
struct cmp_lt {
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs; }
|
||||
// We cannot use the normal comparison operators like "<" on the bytes
|
||||
// type, because they treat individual bytes as signed but we need to
|
||||
// compare them as *unsigned*. So we need a specialization for bytes.
|
||||
bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) < 0; }
|
||||
static constexpr const char* diagnostic = "LT operator";
|
||||
};
|
||||
|
||||
struct cmp_le {
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs <= rhs; }
|
||||
bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) <= 0; }
|
||||
// bytes only has <, so we cannot use <=.
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs || lhs == rhs; }
|
||||
static constexpr const char* diagnostic = "LE operator";
|
||||
};
|
||||
|
||||
struct cmp_ge {
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs >= rhs; }
|
||||
bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) >= 0; }
|
||||
// bytes only has <, so we cannot use >=.
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return rhs < lhs || lhs == rhs; }
|
||||
static constexpr const char* diagnostic = "GE operator";
|
||||
};
|
||||
|
||||
struct cmp_gt {
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return lhs > rhs; }
|
||||
bool operator()(const bytes& lhs, const bytes& rhs) const { return compare_unsigned(lhs, rhs) > 0; }
|
||||
// bytes only has <, so we cannot use >.
|
||||
template <typename T> bool operator()(const T& lhs, const T& rhs) const { return rhs < lhs; }
|
||||
static constexpr const char* diagnostic = "GT operator";
|
||||
};
|
||||
|
||||
// True if v is between lb and ub, inclusive. Throws if lb > ub.
|
||||
template <typename T>
|
||||
bool check_BETWEEN(const T& v, const T& lb, const T& ub) {
|
||||
if (cmp_lt()(ub, lb)) {
|
||||
if (ub < lb) {
|
||||
throw api_error("ValidationException",
|
||||
format("BETWEEN operator requires lower_bound <= upper_bound, but {} > {}", lb, ub));
|
||||
}
|
||||
@@ -523,15 +505,16 @@ static bool verify_expected_one(const rjson::value& condition, const rjson::valu
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the existing values of the item (previous_item) match the
|
||||
// Verify that the existing values of the item (previous_item) match the
|
||||
// conditions given by the Expected and ConditionalOperator parameters
|
||||
// (if they exist) in the request (an UpdateItem, PutItem or DeleteItem).
|
||||
// This function can throw an ValidationException API error if there
|
||||
// This function will throw a ConditionalCheckFailedException API error
|
||||
// if the values do not match the condition, or ValidationException if there
|
||||
// are errors in the format of the condition itself.
|
||||
bool verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value>& previous_item) {
|
||||
void verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value>& previous_item) {
|
||||
const rjson::value* expected = rjson::find(req, "Expected");
|
||||
if (!expected) {
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
if (!expected->IsObject()) {
|
||||
throw api_error("ValidationException", "'Expected' parameter, if given, must be an object");
|
||||
@@ -560,123 +543,22 @@ bool verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value
|
||||
for (auto it = expected->MemberBegin(); it != expected->MemberEnd(); ++it) {
|
||||
const rjson::value* got = nullptr;
|
||||
if (previous_item && previous_item->IsObject() && previous_item->HasMember("Item")) {
|
||||
got = rjson::find((*previous_item)["Item"], rjson::to_string_view(it->name));
|
||||
got = rjson::find((*previous_item)["Item"], rjson::string_ref_type(it->name.GetString()));
|
||||
}
|
||||
bool success = verify_expected_one(it->value, got);
|
||||
if (success && !require_all) {
|
||||
// When !require_all, one success is enough!
|
||||
return true;
|
||||
return;
|
||||
} else if (!success && require_all) {
|
||||
// When require_all, one failure is enough!
|
||||
return false;
|
||||
throw api_error("ConditionalCheckFailedException", "Failed condition.");
|
||||
}
|
||||
}
|
||||
// If we got here and require_all, none of the checks failed, so succeed.
|
||||
// If we got here and !require_all, all of the checks failed, so fail.
|
||||
return require_all;
|
||||
}
|
||||
|
||||
bool calculate_primitive_condition(const parsed::primitive_condition& cond,
|
||||
std::unordered_set<std::string>& used_attribute_values,
|
||||
std::unordered_set<std::string>& used_attribute_names,
|
||||
const rjson::value& req,
|
||||
schema_ptr schema,
|
||||
const std::unique_ptr<rjson::value>& previous_item) {
|
||||
std::vector<rjson::value> calculated_values;
|
||||
calculated_values.reserve(cond._values.size());
|
||||
for (const parsed::value& v : cond._values) {
|
||||
calculated_values.push_back(calculate_value(v,
|
||||
cond._op == parsed::primitive_condition::type::VALUE ?
|
||||
calculate_value_caller::ConditionExpressionAlone :
|
||||
calculate_value_caller::ConditionExpression,
|
||||
rjson::find(req, "ExpressionAttributeValues"),
|
||||
used_attribute_names, used_attribute_values,
|
||||
req, schema, previous_item));
|
||||
}
|
||||
switch (cond._op) {
|
||||
case parsed::primitive_condition::type::BETWEEN:
|
||||
if (calculated_values.size() != 3) {
|
||||
// Shouldn't happen unless we have a bug in the parser
|
||||
throw std::logic_error(format("Wrong number of values {} in BETWEEN primitive_condition", cond._values.size()));
|
||||
}
|
||||
return check_BETWEEN(&calculated_values[0], calculated_values[1], calculated_values[2]);
|
||||
case parsed::primitive_condition::type::IN:
|
||||
return check_IN(calculated_values);
|
||||
case parsed::primitive_condition::type::VALUE:
|
||||
if (calculated_values.size() != 1) {
|
||||
// Shouldn't happen unless we have a bug in the parser
|
||||
throw std::logic_error(format("Unexpected values in primitive_condition", cond._values.size()));
|
||||
}
|
||||
// Unwrap the boolean wrapped as the value (if it is a boolean)
|
||||
if (calculated_values[0].IsObject() && calculated_values[0].MemberCount() == 1) {
|
||||
auto it = calculated_values[0].MemberBegin();
|
||||
if (it->name == "BOOL" && it->value.IsBool()) {
|
||||
return it->value.GetBool();
|
||||
}
|
||||
}
|
||||
throw api_error("ValidationException",
|
||||
format("ConditionExpression: condition results in a non-boolean value: {}",
|
||||
calculated_values[0]));
|
||||
default:
|
||||
// All the rest of the operators have exactly two parameters (and unless
|
||||
// we have a bug in the parser, that's what we have in the parsed object:
|
||||
if (calculated_values.size() != 2) {
|
||||
throw std::logic_error(format("Wrong number of values {} in primitive_condition object", cond._values.size()));
|
||||
}
|
||||
}
|
||||
switch (cond._op) {
|
||||
case parsed::primitive_condition::type::EQ:
|
||||
return check_EQ(&calculated_values[0], calculated_values[1]);
|
||||
case parsed::primitive_condition::type::NE:
|
||||
return check_NE(&calculated_values[0], calculated_values[1]);
|
||||
case parsed::primitive_condition::type::GT:
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_gt{});
|
||||
case parsed::primitive_condition::type::GE:
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_ge{});
|
||||
case parsed::primitive_condition::type::LT:
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_lt{});
|
||||
case parsed::primitive_condition::type::LE:
|
||||
return check_compare(&calculated_values[0], calculated_values[1], cmp_le{});
|
||||
default:
|
||||
// Shouldn't happen unless we have a bug in the parser
|
||||
throw std::logic_error(format("Unknown type {} in primitive_condition object", (int)(cond._op)));
|
||||
if (!require_all) {
|
||||
throw api_error("ConditionalCheckFailedException", "None of ORed Expect conditions were successful.");
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the existing values of the item (previous_item) match the
|
||||
// conditions given by the given parsed ConditionExpression.
|
||||
bool verify_condition_expression(
|
||||
const parsed::condition_expression& condition_expression,
|
||||
std::unordered_set<std::string>& used_attribute_values,
|
||||
std::unordered_set<std::string>& used_attribute_names,
|
||||
const rjson::value& req,
|
||||
schema_ptr schema,
|
||||
const std::unique_ptr<rjson::value>& previous_item) {
|
||||
if (condition_expression.empty()) {
|
||||
return true;
|
||||
}
|
||||
bool ret = std::visit(overloaded_functor {
|
||||
[&] (const parsed::primitive_condition& cond) -> bool {
|
||||
return calculate_primitive_condition(cond, used_attribute_values,
|
||||
used_attribute_names, req, schema, previous_item);
|
||||
},
|
||||
[&] (const parsed::condition_expression::condition_list& list) -> bool {
|
||||
auto verify_condition = [&] (const parsed::condition_expression& e) {
|
||||
return verify_condition_expression(e, used_attribute_values,
|
||||
used_attribute_names, req, schema, previous_item);
|
||||
};
|
||||
switch (list.op) {
|
||||
case '&':
|
||||
return boost::algorithm::all_of(list.conditions, verify_condition);
|
||||
case '|':
|
||||
return boost::algorithm::any_of(list.conditions, verify_condition);
|
||||
default:
|
||||
// Shouldn't happen unless we have a bug in the parser
|
||||
throw std::logic_error("bad operator in condition_list");
|
||||
}
|
||||
}
|
||||
}, condition_expression._expression);
|
||||
return condition_expression._negated ? !ret : ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -44,6 +44,6 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
|
||||
|
||||
::shared_ptr<cql3::restrictions::statement_restrictions> get_filtering_restrictions(schema_ptr schema, const column_definition& attrs_col, const rjson::value& query_filter);
|
||||
|
||||
bool verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value>& previous_item);
|
||||
void verify_expected(const rjson::value& req, const std::unique_ptr<rjson::value>& previous_item);
|
||||
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -25,56 +25,45 @@
|
||||
#include <seastar/http/httpd.hh>
|
||||
#include "seastarx.hh"
|
||||
#include <seastar/json/json_elements.hh>
|
||||
#include <seastar/core/sharded.hh>
|
||||
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/client_state.hh"
|
||||
|
||||
#include "alternator/error.hh"
|
||||
#include "stats.hh"
|
||||
#include "rjson.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
class executor : public peering_sharded_service<executor> {
|
||||
class executor {
|
||||
service::storage_proxy& _proxy;
|
||||
service::migration_manager& _mm;
|
||||
// An smp_service_group to be used for limiting the concurrency when
|
||||
// forwarding Alternator request between shards - if necessary for LWT.
|
||||
smp_service_group _ssg;
|
||||
|
||||
public:
|
||||
using client_state = service::client_state;
|
||||
using request_return_type = std::variant<json::json_return_type, api_error>;
|
||||
stats _stats;
|
||||
static constexpr auto ATTRS_COLUMN_NAME = ":attrs";
|
||||
static constexpr auto KEYSPACE_NAME_PREFIX = "alternator_";
|
||||
static constexpr auto KEYSPACE_NAME = "alternator";
|
||||
|
||||
executor(service::storage_proxy& proxy, service::migration_manager& mm, smp_service_group ssg)
|
||||
: _proxy(proxy), _mm(mm), _ssg(ssg) {}
|
||||
executor(service::storage_proxy& proxy, service::migration_manager& mm) : _proxy(proxy), _mm(mm) {}
|
||||
|
||||
future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> delete_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> put_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> delete_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> update_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> list_tables(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> describe_endpoints(client_state& client_state, service_permit permit, rjson::value request, std::string host_header);
|
||||
future<request_return_type> batch_write_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> query(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> tag_resource(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> untag_resource(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> list_tags_of_resource(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<json::json_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> delete_table(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> put_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> get_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> delete_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> update_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> list_tables(client_state& client_state, std::string content);
|
||||
future<json::json_return_type> scan(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> describe_endpoints(client_state& client_state, std::string content, std::string host_header);
|
||||
future<json::json_return_type> batch_write_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
future<json::json_return_type> query(client_state& client_state, tracing::trace_state_ptr trace_state, std::string content);
|
||||
|
||||
future<> start();
|
||||
future<> stop() { return make_ready_future<>(); }
|
||||
|
||||
future<> create_keyspace(std::string_view keyspace_name);
|
||||
future<> maybe_create_keyspace();
|
||||
|
||||
static tracing::trace_state_ptr maybe_trace_query(client_state& client_state, sstring_view op, sstring_view query);
|
||||
};
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
#include "expressions.hh"
|
||||
#include "alternator/expressionsLexer.hpp"
|
||||
#include "alternator/expressionsParser.hpp"
|
||||
#include "utils/overloaded_functor.hh"
|
||||
|
||||
#include <seastarx.hh>
|
||||
|
||||
@@ -66,19 +65,13 @@ parse_projection_expression(std::string query) {
|
||||
}
|
||||
}
|
||||
|
||||
parsed::condition_expression
|
||||
parse_condition_expression(std::string query) {
|
||||
try {
|
||||
return do_with_parser(query, std::mem_fn(&expressionsParser::condition_expression));
|
||||
} catch (...) {
|
||||
throw expressions_syntax_error(format("Failed parsing ConditionExpression '{}': {}", query, std::current_exception()));
|
||||
}
|
||||
}
|
||||
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
|
||||
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
|
||||
|
||||
namespace parsed {
|
||||
|
||||
void update_expression::add(update_expression::action a) {
|
||||
std::visit(overloaded_functor {
|
||||
std::visit(overloaded {
|
||||
[&] (action::set&) { seen_set = true; },
|
||||
[&] (action::remove&) { seen_remove = true; },
|
||||
[&] (action::add&) { seen_add = true; },
|
||||
@@ -101,27 +94,5 @@ void update_expression::append(update_expression other) {
|
||||
seen_del |= other.seen_del;
|
||||
}
|
||||
|
||||
void condition_expression::append(condition_expression&& a, char op) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (condition_list& x) {
|
||||
// If 'a' has a single condition, we could, instead of inserting
|
||||
// it insert its single condition (possibly negated if a._negated)
|
||||
// But considering it we don't evaluate these expressions many
|
||||
// times, this optimization is not worth extra code complexity.
|
||||
if (!x.conditions.empty() && x.op != op) {
|
||||
// Shouldn't happen unless we have a bug in the parser
|
||||
throw std::logic_error("condition_expression::append called with mixed operators");
|
||||
}
|
||||
x.conditions.push_back(std::move(a));
|
||||
x.op = op;
|
||||
},
|
||||
[&] (primitive_condition& x) {
|
||||
// Shouldn't happen unless we have a bug in the parser
|
||||
throw std::logic_error("condition_expression::append called on primitive_condition");
|
||||
}
|
||||
}, _expression);
|
||||
}
|
||||
|
||||
|
||||
} // namespace parsed
|
||||
} // namespace alternator
|
||||
|
||||
@@ -145,12 +145,6 @@ REMOVE: R E M O V E;
|
||||
ADD: A D D;
|
||||
DELETE: D E L E T E;
|
||||
|
||||
AND: A N D;
|
||||
OR: O R;
|
||||
NOT: N O T;
|
||||
BETWEEN: B E T W E E N;
|
||||
IN: I N;
|
||||
|
||||
fragment ALPHA: 'A'..'Z' | 'a'..'z';
|
||||
fragment DIGIT: '0'..'9';
|
||||
fragment ALNUM: ALPHA | DIGIT | '_';
|
||||
@@ -171,19 +165,19 @@ path returns [parsed::path p]:
|
||||
| '[' INTEGER ']' { $p.add_index(std::stoi($INTEGER.text)); }
|
||||
)*;
|
||||
|
||||
value returns [parsed::value v]:
|
||||
VALREF { $v.set_valref($VALREF.text); }
|
||||
| path { $v.set_path($path.p); }
|
||||
| NAME { $v.set_func_name($NAME.text); }
|
||||
'(' x=value { $v.add_func_parameter($x.v); }
|
||||
(',' x=value { $v.add_func_parameter($x.v); })*
|
||||
update_expression_set_value returns [parsed::value v]:
|
||||
VALREF { $v.set_valref($VALREF.text); }
|
||||
| path { $v.set_path($path.p); }
|
||||
| NAME { $v.set_func_name($NAME.text); }
|
||||
'(' x=update_expression_set_value { $v.add_func_parameter($x.v); }
|
||||
(',' x=update_expression_set_value { $v.add_func_parameter($x.v); })*
|
||||
')'
|
||||
;
|
||||
|
||||
update_expression_set_rhs returns [parsed::set_rhs rhs]:
|
||||
v=value { $rhs.set_value(std::move($v.v)); }
|
||||
( '+' v=value { $rhs.set_plus(std::move($v.v)); }
|
||||
| '-' v=value { $rhs.set_minus(std::move($v.v)); }
|
||||
v=update_expression_set_value { $rhs.set_value(std::move($v.v)); }
|
||||
( '+' v=update_expression_set_value { $rhs.set_plus(std::move($v.v)); }
|
||||
| '-' v=update_expression_set_value { $rhs.set_minus(std::move($v.v)); }
|
||||
)?
|
||||
;
|
||||
|
||||
@@ -218,48 +212,3 @@ update_expression returns [parsed::update_expression e]:
|
||||
projection_expression returns [std::vector<parsed::path> v]:
|
||||
p=path { $v.push_back(std::move($p.p)); }
|
||||
(',' p=path { $v.push_back(std::move($p.p)); } )* EOF;
|
||||
|
||||
|
||||
primitive_condition returns [parsed::primitive_condition c]:
|
||||
v=value { $c.add_value(std::move($v.v));
|
||||
$c.set_operator(parsed::primitive_condition::type::VALUE); }
|
||||
( ( '=' { $c.set_operator(parsed::primitive_condition::type::EQ); }
|
||||
| '<' '>' { $c.set_operator(parsed::primitive_condition::type::NE); }
|
||||
| '<' { $c.set_operator(parsed::primitive_condition::type::LT); }
|
||||
| '<' '=' { $c.set_operator(parsed::primitive_condition::type::LE); }
|
||||
| '>' { $c.set_operator(parsed::primitive_condition::type::GT); }
|
||||
| '>' '=' { $c.set_operator(parsed::primitive_condition::type::GE); }
|
||||
)
|
||||
v=value { $c.add_value(std::move($v.v)); }
|
||||
| BETWEEN { $c.set_operator(parsed::primitive_condition::type::BETWEEN); }
|
||||
v=value { $c.add_value(std::move($v.v)); }
|
||||
AND
|
||||
v=value { $c.add_value(std::move($v.v)); }
|
||||
| IN '(' { $c.set_operator(parsed::primitive_condition::type::IN); }
|
||||
v=value { $c.add_value(std::move($v.v)); }
|
||||
(',' v=value { $c.add_value(std::move($v.v)); })*
|
||||
')'
|
||||
)?
|
||||
;
|
||||
|
||||
// The following rules for parsing boolean expressions are verbose and
|
||||
// somewhat strange because of Antlr 3's limitations on recursive rules,
|
||||
// common rule prefixes, and (lack of) support for operator precedence.
|
||||
// These rules could have been written more clearly using a more powerful
|
||||
// parser generator - such as Yacc.
|
||||
boolean_expression returns [parsed::condition_expression e]:
|
||||
b=boolean_expression_1 { $e.append(std::move($b.e), '|'); }
|
||||
(OR b=boolean_expression_1 { $e.append(std::move($b.e), '|'); } )*
|
||||
;
|
||||
boolean_expression_1 returns [parsed::condition_expression e]:
|
||||
b=boolean_expression_2 { $e.append(std::move($b.e), '&'); }
|
||||
(AND b=boolean_expression_2 { $e.append(std::move($b.e), '&'); } )*
|
||||
;
|
||||
boolean_expression_2 returns [parsed::condition_expression e]:
|
||||
p=primitive_condition { $e.set_primitive(std::move($p.c)); }
|
||||
| NOT b=boolean_expression_2 { $e = std::move($b.e); $e.apply_not(); }
|
||||
| '(' b=boolean_expression ')' { $e = std::move($b.e); }
|
||||
;
|
||||
|
||||
condition_expression returns [parsed::condition_expression e]:
|
||||
boolean_expression { e=std::move($boolean_expression.e); } EOF;
|
||||
|
||||
@@ -36,6 +36,6 @@ public:
|
||||
|
||||
parsed::update_expression parse_update_expression(std::string query);
|
||||
std::vector<parsed::path> parse_projection_expression(std::string query);
|
||||
parsed::condition_expression parse_condition_expression(std::string query);
|
||||
|
||||
|
||||
} /* namespace alternator */
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
* Copyright 2020 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "rjson.hh"
|
||||
#include "schema_fwd.hh"
|
||||
|
||||
#include "expressions_types.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
// calculate_value() behaves slightly different (especially, different
|
||||
// functions supported) when used in different types of expressions, as
|
||||
// enumerated in this enum:
|
||||
enum class calculate_value_caller {
|
||||
UpdateExpression, ConditionExpression, ConditionExpressionAlone
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, calculate_value_caller caller) {
|
||||
switch (caller) {
|
||||
case calculate_value_caller::UpdateExpression:
|
||||
out << "UpdateExpression";
|
||||
break;
|
||||
case calculate_value_caller::ConditionExpression:
|
||||
out << "ConditionExpression";
|
||||
break;
|
||||
case calculate_value_caller::ConditionExpressionAlone:
|
||||
out << "ConditionExpression";
|
||||
break;
|
||||
default:
|
||||
out << "unknown type of expression";
|
||||
break;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2);
|
||||
|
||||
rjson::value calculate_value(const parsed::value& v,
|
||||
calculate_value_caller caller,
|
||||
const rjson::value* expression_attribute_values,
|
||||
std::unordered_set<std::string>& used_attribute_names,
|
||||
std::unordered_set<std::string>& used_attribute_values,
|
||||
const rjson::value& update_info,
|
||||
schema_ptr schema,
|
||||
const std::unique_ptr<rjson::value>& previous_item);
|
||||
|
||||
bool verify_condition_expression(
|
||||
const parsed::condition_expression& condition_expression,
|
||||
std::unordered_set<std::string>& used_attribute_values,
|
||||
std::unordered_set<std::string>& used_attribute_names,
|
||||
const rjson::value& req,
|
||||
schema_ptr schema,
|
||||
const std::unique_ptr<rjson::value>& previous_item);
|
||||
|
||||
} /* namespace alternator */
|
||||
@@ -88,15 +88,6 @@ struct value {
|
||||
void add_func_parameter(value v) {
|
||||
std::get<function_call>(_value)._parameters.emplace_back(std::move(v));
|
||||
}
|
||||
bool is_valref() const {
|
||||
return std::holds_alternative<std::string>(_value);
|
||||
}
|
||||
bool is_path() const {
|
||||
return std::holds_alternative<path>(_value);
|
||||
}
|
||||
bool is_func() const {
|
||||
return std::holds_alternative<function_call>(_value);
|
||||
}
|
||||
};
|
||||
|
||||
// The right-hand-side of a SET in an update expression can be either a
|
||||
@@ -171,58 +162,5 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// A primitive_condition is a condition expression involving one condition,
|
||||
// while the full condition_expression below adds boolean logic over these
|
||||
// primitive conditions.
|
||||
// The supported primitive conditions are:
|
||||
// 1. Binary operators - v1 OP v2, where OP is =, <>, <, <=, >, or >= and
|
||||
// v1 and v2 are values - from the item (an attribute path), the query
|
||||
// (a ":val" reference), or a function of the the above (only the size()
|
||||
// function is supported).
|
||||
// 2. Ternary operator - v1 BETWEEN v2 and v3 (means v1 >= v2 AND v1 <= v3).
|
||||
// 3. N-ary operator - v1 IN ( v2, v3, ... )
|
||||
// 4. A single function call (attribute_exists etc.). The parser actually
|
||||
// accepts a more general "value" here but later stages reject a value
|
||||
// which is not a function call (because DynamoDB does it too).
|
||||
class primitive_condition {
|
||||
public:
|
||||
enum class type {
|
||||
UNDEFINED, VALUE, EQ, NE, LT, LE, GT, GE, BETWEEN, IN
|
||||
};
|
||||
type _op = type::UNDEFINED;
|
||||
std::vector<value> _values;
|
||||
void set_operator(type op) {
|
||||
_op = op;
|
||||
}
|
||||
void add_value(value&& v) {
|
||||
_values.push_back(std::move(v));
|
||||
}
|
||||
bool empty() const {
|
||||
return _op == type::UNDEFINED;
|
||||
}
|
||||
};
|
||||
|
||||
class condition_expression {
|
||||
public:
|
||||
bool _negated = false; // If true, the entire condition is negated
|
||||
struct condition_list {
|
||||
char op = '|'; // '&' or '|'
|
||||
std::vector<condition_expression> conditions;
|
||||
};
|
||||
std::variant<primitive_condition, condition_list> _expression = condition_list();
|
||||
|
||||
void set_primitive(primitive_condition&& p) {
|
||||
_expression = std::move(p);
|
||||
}
|
||||
void append(condition_expression&& c, char op);
|
||||
void apply_not() {
|
||||
_negated = !_negated;
|
||||
}
|
||||
bool empty() const {
|
||||
return std::holds_alternative<condition_list>(_expression) &&
|
||||
std::get<condition_list>(_expression).conditions.empty();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace parsed
|
||||
} // namespace alternator
|
||||
|
||||
@@ -22,108 +22,14 @@
|
||||
#include "rjson.hh"
|
||||
#include "error.hh"
|
||||
#include <seastar/core/print.hh>
|
||||
#include <seastar/core/thread.hh>
|
||||
|
||||
namespace rjson {
|
||||
|
||||
static allocator the_allocator;
|
||||
|
||||
/*
|
||||
* This wrapper class adds nested level checks to rapidjson's handlers.
|
||||
* Each rapidjson handler implements functions for accepting JSON values,
|
||||
* which includes strings, numbers, objects, arrays, etc.
|
||||
* Parsing objects and arrays needs to be performed carefully with regard
|
||||
* to stack overflow - each object/array layer adds another stack frame
|
||||
* to parsing, printing and destroying the parent JSON document.
|
||||
* To prevent stack overflow, a rapidjson handler can be wrapped with
|
||||
* guarded_json_handler, which accepts an additional max_nested_level parameter.
|
||||
* After trying to exceed the max nested level, a proper rjson::error will be thrown.
|
||||
*/
|
||||
template<typename Handler, bool EnableYield>
|
||||
struct guarded_yieldable_json_handler : public Handler {
|
||||
size_t _nested_level = 0;
|
||||
size_t _max_nested_level;
|
||||
public:
|
||||
using handler_base = Handler;
|
||||
|
||||
explicit guarded_yieldable_json_handler(size_t max_nested_level) : _max_nested_level(max_nested_level) {}
|
||||
guarded_yieldable_json_handler(string_buffer& buf, size_t max_nested_level)
|
||||
: handler_base(buf), _max_nested_level(max_nested_level) {}
|
||||
|
||||
void Parse(const char* str, size_t length) {
|
||||
rapidjson::MemoryStream ms(static_cast<const char*>(str), length * sizeof(typename encoding::Ch));
|
||||
rapidjson::EncodedInputStream<encoding, rapidjson::MemoryStream> is(ms);
|
||||
rapidjson::GenericReader<encoding, encoding, allocator> reader(&the_allocator);
|
||||
reader.Parse(is, *this);
|
||||
if (reader.HasParseError()) {
|
||||
throw rjson::error(format("Parsing JSON failed: {}", rapidjson::GetParseError_En(reader.GetParseErrorCode())));
|
||||
}
|
||||
//NOTICE: The handler has parsed the string, but in case of rapidjson::GenericDocument
|
||||
// the data now resides in an internal stack_ variable, which is private instead of
|
||||
// protected... which means we cannot simply access its data. Fortunately, another
|
||||
// function for populating documents from SAX events can be abused to extract the data
|
||||
// from the stack via gadget-oriented programming - we use an empty event generator
|
||||
// which does nothing, and use it to call Populate(), which assumes that the generator
|
||||
// will fill the stack with something. It won't, but our stack is already filled with
|
||||
// data we want to steal, so once Populate() ends, our document will be properly parsed.
|
||||
// A proper solution could be programmed once rapidjson declares this stack_ variable
|
||||
// as protected instead of private, so that this class can access it.
|
||||
auto dummy_generator = [](handler_base&){return true;};
|
||||
handler_base::Populate(dummy_generator);
|
||||
}
|
||||
|
||||
bool StartObject() {
|
||||
++_nested_level;
|
||||
check_nested_level();
|
||||
maybe_yield();
|
||||
return handler_base::StartObject();
|
||||
}
|
||||
|
||||
bool EndObject(rapidjson::SizeType elements_count = 0) {
|
||||
--_nested_level;
|
||||
return handler_base::EndObject(elements_count);
|
||||
}
|
||||
|
||||
bool StartArray() {
|
||||
++_nested_level;
|
||||
check_nested_level();
|
||||
maybe_yield();
|
||||
return handler_base::StartArray();
|
||||
}
|
||||
|
||||
bool EndArray(rapidjson::SizeType elements_count = 0) {
|
||||
--_nested_level;
|
||||
return handler_base::EndArray(elements_count);
|
||||
}
|
||||
|
||||
bool Null() { maybe_yield(); return handler_base::Null(); }
|
||||
bool Bool(bool b) { maybe_yield(); return handler_base::Bool(b); }
|
||||
bool Int(int i) { maybe_yield(); return handler_base::Int(i); }
|
||||
bool Uint(unsigned u) { maybe_yield(); return handler_base::Uint(u); }
|
||||
bool Int64(int64_t i64) { maybe_yield(); return handler_base::Int64(i64); }
|
||||
bool Uint64(uint64_t u64) { maybe_yield(); return handler_base::Uint64(u64); }
|
||||
bool Double(double d) { maybe_yield(); return handler_base::Double(d); }
|
||||
bool String(const value::Ch* str, size_t length, bool copy = false) { maybe_yield(); return handler_base::String(str, length, copy); }
|
||||
bool Key(const value::Ch* str, size_t length, bool copy = false) { maybe_yield(); return handler_base::Key(str, length, copy); }
|
||||
|
||||
|
||||
protected:
|
||||
static void maybe_yield() {
|
||||
if constexpr (EnableYield) {
|
||||
thread::maybe_yield();
|
||||
}
|
||||
}
|
||||
|
||||
void check_nested_level() const {
|
||||
if (RAPIDJSON_UNLIKELY(_nested_level > _max_nested_level)) {
|
||||
throw rjson::error(format("Max nested level reached: {}", _max_nested_level));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::string print(const rjson::value& value) {
|
||||
string_buffer buffer;
|
||||
guarded_yieldable_json_handler<writer, false> writer(buffer, 39);
|
||||
writer writer(buffer);
|
||||
value.Accept(writer);
|
||||
return std::string(buffer.GetString());
|
||||
}
|
||||
@@ -132,9 +38,13 @@ rjson::value copy(const rjson::value& value) {
|
||||
return rjson::value(value, the_allocator);
|
||||
}
|
||||
|
||||
rjson::value parse(std::string_view str) {
|
||||
guarded_yieldable_json_handler<document, false> d(39);
|
||||
d.Parse(str.data(), str.size());
|
||||
rjson::value parse(const std::string& str) {
|
||||
return parse_raw(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
rjson::value parse_raw(const char* c_str, size_t size) {
|
||||
rjson::document d;
|
||||
d.Parse(c_str, size);
|
||||
if (d.HasParseError()) {
|
||||
throw rjson::error(format("Parsing JSON failed: {}", GetParseError_En(d.GetParseError())));
|
||||
}
|
||||
@@ -142,22 +52,8 @@ rjson::value parse(std::string_view str) {
|
||||
return std::move(v);
|
||||
}
|
||||
|
||||
rjson::value parse_yieldable(std::string_view str) {
|
||||
guarded_yieldable_json_handler<document, true> d(39);
|
||||
d.Parse(str.data(), str.size());
|
||||
if (d.HasParseError()) {
|
||||
throw rjson::error(format("Parsing JSON failed: {}", GetParseError_En(d.GetParseError())));
|
||||
}
|
||||
rjson::value& v = d;
|
||||
return std::move(v);
|
||||
}
|
||||
|
||||
rjson::value& get(rjson::value& value, std::string_view name) {
|
||||
// Although FindMember() has a variant taking a StringRef, it ignores the
|
||||
// given length (see https://github.com/Tencent/rapidjson/issues/1649).
|
||||
// Luckily, the variant taking a GenericValue doesn't share this bug,
|
||||
// and we can create a string GenericValue without copying the string.
|
||||
auto member_it = value.FindMember(rjson::value(name.data(), name.size()));
|
||||
rjson::value& get(rjson::value& value, rjson::string_ref_type name) {
|
||||
auto member_it = value.FindMember(name);
|
||||
if (member_it != value.MemberEnd())
|
||||
return member_it->value;
|
||||
else {
|
||||
@@ -165,8 +61,8 @@ rjson::value& get(rjson::value& value, std::string_view name) {
|
||||
}
|
||||
}
|
||||
|
||||
const rjson::value& get(const rjson::value& value, std::string_view name) {
|
||||
auto member_it = value.FindMember(rjson::value(name.data(), name.size()));
|
||||
const rjson::value& get(const rjson::value& value, rjson::string_ref_type name) {
|
||||
auto member_it = value.FindMember(name);
|
||||
if (member_it != value.MemberEnd())
|
||||
return member_it->value;
|
||||
else {
|
||||
@@ -186,48 +82,24 @@ rjson::value from_string(const char* str, size_t size) {
|
||||
return rjson::value(str, size, the_allocator);
|
||||
}
|
||||
|
||||
rjson::value from_string(std::string_view view) {
|
||||
return rjson::value(view.data(), view.size(), the_allocator);
|
||||
}
|
||||
|
||||
const rjson::value* find(const rjson::value& value, std::string_view name) {
|
||||
// Although FindMember() has a variant taking a StringRef, it ignores the
|
||||
// given length (see https://github.com/Tencent/rapidjson/issues/1649).
|
||||
// Luckily, the variant taking a GenericValue doesn't share this bug,
|
||||
// and we can create a string GenericValue without copying the string.
|
||||
auto member_it = value.FindMember(rjson::value(name.data(), name.size()));
|
||||
const rjson::value* find(const rjson::value& value, string_ref_type name) {
|
||||
auto member_it = value.FindMember(name);
|
||||
return member_it != value.MemberEnd() ? &member_it->value : nullptr;
|
||||
}
|
||||
|
||||
rjson::value* find(rjson::value& value, std::string_view name) {
|
||||
auto member_it = value.FindMember(rjson::value(name.data(), name.size()));
|
||||
rjson::value* find(rjson::value& value, string_ref_type name) {
|
||||
auto member_it = value.FindMember(name);
|
||||
return member_it != value.MemberEnd() ? &member_it->value : nullptr;
|
||||
}
|
||||
|
||||
bool remove_member(rjson::value& value, std::string_view name) {
|
||||
// Although RemoveMember() has a variant taking a StringRef, it ignores
|
||||
// given length (see https://github.com/Tencent/rapidjson/issues/1649).
|
||||
// Luckily, the variant taking a GenericValue doesn't share this bug,
|
||||
// and we can create a string GenericValue without copying the string.
|
||||
return value.RemoveMember(rjson::value(name.data(), name.size()));
|
||||
}
|
||||
|
||||
void set_with_string_name(rjson::value& base, const std::string& name, rjson::value&& member) {
|
||||
base.AddMember(rjson::value(name.c_str(), name.size(), the_allocator), std::move(member), the_allocator);
|
||||
}
|
||||
|
||||
void set_with_string_name(rjson::value& base, std::string_view name, rjson::value&& member) {
|
||||
base.AddMember(rjson::value(name.data(), name.size(), the_allocator), std::move(member), the_allocator);
|
||||
}
|
||||
|
||||
void set_with_string_name(rjson::value& base, const std::string& name, rjson::string_ref_type member) {
|
||||
base.AddMember(rjson::value(name.c_str(), name.size(), the_allocator), rjson::value(member), the_allocator);
|
||||
}
|
||||
|
||||
void set_with_string_name(rjson::value& base, std::string_view name, rjson::string_ref_type member) {
|
||||
base.AddMember(rjson::value(name.data(), name.size(), the_allocator), rjson::value(member), the_allocator);
|
||||
}
|
||||
|
||||
void set(rjson::value& base, rjson::string_ref_type name, rjson::value&& member) {
|
||||
base.AddMember(name, std::move(member), the_allocator);
|
||||
}
|
||||
|
||||
@@ -104,49 +104,38 @@ inline rjson::value empty_string() {
|
||||
// The representation is dense - without any redundant indentation.
|
||||
std::string print(const rjson::value& value);
|
||||
|
||||
// Returns a string_view to the string held in a JSON value (which is
|
||||
// assumed to hold a string, i.e., v.IsString() == true). This is a view
|
||||
// to the existing data - no copying is done.
|
||||
inline std::string_view to_string_view(const rjson::value& v) {
|
||||
return std::string_view(v.GetString(), v.GetStringLength());
|
||||
}
|
||||
|
||||
// Copies given JSON value - involves allocation
|
||||
rjson::value copy(const rjson::value& value);
|
||||
|
||||
// Parses a JSON value from given string or raw character array.
|
||||
// The string/char array liveness does not need to be persisted,
|
||||
// as parse() will allocate member names and values.
|
||||
// as both parse() and parse_raw() will allocate member names and values.
|
||||
// Throws rjson::error if parsing failed.
|
||||
rjson::value parse(std::string_view str);
|
||||
// Needs to be run in thread context
|
||||
rjson::value parse_yieldable(std::string_view str);
|
||||
rjson::value parse(const std::string& str);
|
||||
rjson::value parse_raw(const char* c_str, size_t size);
|
||||
|
||||
// Creates a JSON value (of JSON string type) out of internal string representations.
|
||||
// The string value is copied, so str's liveness does not need to be persisted.
|
||||
rjson::value from_string(const std::string& str);
|
||||
rjson::value from_string(const sstring& str);
|
||||
rjson::value from_string(const char* str, size_t size);
|
||||
rjson::value from_string(std::string_view view);
|
||||
|
||||
// Returns a pointer to JSON member if it exists, nullptr otherwise
|
||||
rjson::value* find(rjson::value& value, std::string_view name);
|
||||
const rjson::value* find(const rjson::value& value, std::string_view name);
|
||||
rjson::value* find(rjson::value& value, rjson::string_ref_type name);
|
||||
const rjson::value* find(const rjson::value& value, rjson::string_ref_type name);
|
||||
|
||||
// Returns a reference to JSON member if it exists, throws otherwise
|
||||
rjson::value& get(rjson::value& value, std::string_view name);
|
||||
const rjson::value& get(const rjson::value& value, std::string_view name);
|
||||
rjson::value& get(rjson::value& value, rjson::string_ref_type name);
|
||||
const rjson::value& get(const rjson::value& value, rjson::string_ref_type name);
|
||||
|
||||
// Sets a member in given JSON object by moving the member - allocates the name.
|
||||
// Throws if base is not a JSON object.
|
||||
void set_with_string_name(rjson::value& base, const std::string& name, rjson::value&& member);
|
||||
void set_with_string_name(rjson::value& base, std::string_view name, rjson::value&& member);
|
||||
|
||||
// Sets a string member in given JSON object by assigning its reference - allocates the name.
|
||||
// NOTICE: member string liveness must be ensured to be at least as long as base's.
|
||||
// Throws if base is not a JSON object.
|
||||
void set_with_string_name(rjson::value& base, const std::string& name, rjson::string_ref_type member);
|
||||
void set_with_string_name(rjson::value& base, std::string_view name, rjson::string_ref_type member);
|
||||
|
||||
// Sets a member in given JSON object by moving the member.
|
||||
// NOTICE: name liveness must be ensured to be at least as long as base's.
|
||||
@@ -163,9 +152,6 @@ void set(rjson::value& base, rjson::string_ref_type name, rjson::string_ref_type
|
||||
// Throws if base_array is not a JSON array.
|
||||
void push_back(rjson::value& base_array, rjson::value&& item);
|
||||
|
||||
// Remove a member from a JSON object. Throws if value isn't an object.
|
||||
bool remove_member(rjson::value& value, std::string_view name);
|
||||
|
||||
struct single_value_comp {
|
||||
bool operator()(const rjson::value& r1, const rjson::value& r2) const;
|
||||
};
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
/*
|
||||
* Copyright 2020 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <seastarx.hh>
|
||||
#include <service/storage_proxy.hh>
|
||||
#include <service/storage_proxy.hh>
|
||||
#include "rjson.hh"
|
||||
#include "executor.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
// An rmw_operation encapsulates the common logic of all the item update
|
||||
// operations which may involve a read of the item before the write
|
||||
// (so-called Read-Modify-Write operations). These operations include PutItem,
|
||||
// UpdateItem and DeleteItem: All of these may be conditional operations (the
|
||||
// "Expected" parameter) which requir a read before the write, and UpdateItem
|
||||
// may also have an update expression which refers to the item's old value.
|
||||
//
|
||||
// The code below supports running the read and the write together as one
|
||||
// transaction using LWT (this is why rmw_operation is a subclass of
|
||||
// cas_request, as required by storage_proxy::cas()), but also has optional
|
||||
// modes not using LWT.
|
||||
class rmw_operation : public service::cas_request, public enable_shared_from_this<rmw_operation> {
|
||||
public:
|
||||
// The following options choose which mechanism to use for isolating
|
||||
// parallel write operations:
|
||||
// * The FORBID_RMW option forbids RMW (read-modify-write) operations
|
||||
// such as conditional updates. For the remaining write-only
|
||||
// operations, ordinary quorum writes are isolated enough.
|
||||
// * The LWT_ALWAYS option always uses LWT (lightweight transactions)
|
||||
// for any write operation - whether or not it also has a read.
|
||||
// * The LWT_RMW_ONLY option uses LWT only for RMW operations, and uses
|
||||
// ordinary quorum writes for write-only operations.
|
||||
// This option is not safe if the user may send both RMW and write-only
|
||||
// operations on the same item.
|
||||
// * The UNSAFE_RMW option does read-modify-write operations as separate
|
||||
// read and write. It is unsafe - concurrent RMW operations are not
|
||||
// isolated at all. This option will likely be removed in the future.
|
||||
enum class write_isolation {
|
||||
FORBID_RMW, LWT_ALWAYS, LWT_RMW_ONLY, UNSAFE_RMW
|
||||
};
|
||||
static constexpr auto WRITE_ISOLATION_TAG_KEY = "system:write_isolation";
|
||||
|
||||
static write_isolation get_write_isolation_for_schema(schema_ptr schema);
|
||||
|
||||
protected:
|
||||
// The full request JSON
|
||||
rjson::value _request;
|
||||
// All RMW operations involve a single item with a specific partition
|
||||
// and optional clustering key, in a single table, so the following
|
||||
// information is common to all of them:
|
||||
schema_ptr _schema;
|
||||
partition_key _pk = partition_key::make_empty();
|
||||
clustering_key _ck = clustering_key::make_empty();
|
||||
write_isolation _write_isolation;
|
||||
|
||||
// All RMW operations can have a ReturnValues parameter from the following
|
||||
// choices. But note that only UpdateItem actually supports all of them:
|
||||
enum class returnvalues {
|
||||
NONE, ALL_OLD, UPDATED_OLD, ALL_NEW, UPDATED_NEW
|
||||
} _returnvalues;
|
||||
static returnvalues parse_returnvalues(const rjson::value& request);
|
||||
// When _returnvalues != NONE, apply() should store here, in JSON form,
|
||||
// the values which are to be returned in the "Attributes" field.
|
||||
// The default null JSON means do not return an Attributes field at all.
|
||||
// This field is marked "mutable" so that the const apply() can modify
|
||||
// it (see explanation below), but note that because apply() may be
|
||||
// called more than once, if apply() will sometimes set this field it
|
||||
// must set it (even if just to the default empty value) every time.
|
||||
mutable rjson::value _return_attributes;
|
||||
public:
|
||||
// The constructor of a rmw_operation subclass should parse the request
|
||||
// and try to discover as many input errors as it can before really
|
||||
// attempting the read or write operations.
|
||||
rmw_operation(service::storage_proxy& proxy, rjson::value&& request);
|
||||
// rmw_operation subclasses (update_item_operation, put_item_operation
|
||||
// and delete_item_operation) shall implement an apply() function which
|
||||
// takes the previous value of the item (if it was read) and creates the
|
||||
// write mutation. If the previous value of item does not pass the needed
|
||||
// conditional expression, apply() should return an empty optional.
|
||||
// apply() may throw if it encounters input errors not discovered during
|
||||
// the constructor.
|
||||
// apply() may be called more than once in case of contention, so it must
|
||||
// not change the state saved in the object (issue #7218 was caused by
|
||||
// violating this). We mark apply() "const" to let the compiler validate
|
||||
// this for us. The output-only field _return_attributes is marked
|
||||
// "mutable" above so that apply() can still write to it.
|
||||
virtual std::optional<mutation> apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const = 0;
|
||||
// Convert the above apply() into the signature needed by cas_request:
|
||||
virtual std::optional<mutation> apply(query::result& qr, const query::partition_slice& slice, api::timestamp_type ts) override;
|
||||
virtual ~rmw_operation() = default;
|
||||
schema_ptr schema() const { return _schema; }
|
||||
const rjson::value& request() const { return _request; }
|
||||
rjson::value&& move_request() && { return std::move(_request); }
|
||||
future<executor::request_return_type> execute(service::storage_proxy& proxy,
|
||||
service::client_state& client_state,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
service_permit permit,
|
||||
bool needs_read_before_write,
|
||||
stats& stats);
|
||||
std::optional<shard_id> shard_for_execute(bool needs_read_before_write);
|
||||
};
|
||||
|
||||
} // namespace alternator
|
||||
@@ -136,7 +136,7 @@ rjson::value deserialize_item(bytes_view bv) {
|
||||
|
||||
if (atype == alternator_type::NOT_SUPPORTED_YET) {
|
||||
slogger.trace("Non-optimal deserialization of alternator type {}", int8_t(atype));
|
||||
return rjson::parse(std::string_view(reinterpret_cast<const char *>(bv.data()), bv.size()));
|
||||
return rjson::parse_raw(reinterpret_cast<const char *>(bv.data()), bv.size());
|
||||
}
|
||||
type_representation type_representation = represent_type(atype);
|
||||
visit(*type_representation.dtype, to_json_visitor{deserialized, type_representation.ident, bv});
|
||||
@@ -160,34 +160,27 @@ std::string type_to_string(data_type type) {
|
||||
|
||||
bytes get_key_column_value(const rjson::value& item, const column_definition& column) {
|
||||
std::string column_name = column.name_as_text();
|
||||
const rjson::value* key_typed_value = rjson::find(item, column_name);
|
||||
if (!key_typed_value) {
|
||||
throw api_error("ValidationException", format("Key column {} not found", column_name));
|
||||
std::string expected_type = type_to_string(column.type);
|
||||
|
||||
const rjson::value& key_typed_value = rjson::get(item, rjson::value::StringRefType(column_name.c_str()));
|
||||
if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1) {
|
||||
throw api_error("ValidationException",
|
||||
format("Missing or invalid value object for key column {}: {}", column_name, item));
|
||||
}
|
||||
return get_key_from_typed_value(*key_typed_value, column);
|
||||
return get_key_from_typed_value(key_typed_value, column, expected_type);
|
||||
}
|
||||
|
||||
// Parses the JSON encoding for a key value, which is a map with a single
|
||||
// entry, whose key is the type (expected to match the key column's type)
|
||||
// and the value is the encoded value.
|
||||
bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column) {
|
||||
if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1 ||
|
||||
!key_typed_value.MemberBegin()->value.IsString()) {
|
||||
throw api_error("ValidationException",
|
||||
format("Malformed value object for key column {}: {}",
|
||||
column.name_as_text(), key_typed_value));
|
||||
}
|
||||
|
||||
bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column, const std::string& expected_type) {
|
||||
auto it = key_typed_value.MemberBegin();
|
||||
if (it->name != type_to_string(column.type)) {
|
||||
if (it->name.GetString() != expected_type) {
|
||||
throw api_error("ValidationException",
|
||||
format("Type mismatch: expected type {} for key column {}, got type {}",
|
||||
type_to_string(column.type), column.name_as_text(), it->name.GetString()));
|
||||
expected_type, column.name_as_text(), it->name.GetString()));
|
||||
}
|
||||
if (column.type == bytes_type) {
|
||||
return base64_decode(it->value);
|
||||
} else {
|
||||
return column.type->from_string(rjson::to_string_view(it->value));
|
||||
return column.type->from_string(it->value.GetString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include "types.hh"
|
||||
#include "schema_fwd.hh"
|
||||
#include "schema.hh"
|
||||
#include "keys.hh"
|
||||
#include "rjson.hh"
|
||||
#include "utils/big_decimal.hh"
|
||||
@@ -54,7 +54,7 @@ rjson::value deserialize_item(bytes_view bv);
|
||||
std::string type_to_string(data_type type);
|
||||
|
||||
bytes get_key_column_value(const rjson::value& item, const column_definition& column);
|
||||
bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column);
|
||||
bytes get_key_from_typed_value(const rjson::value& key_typed_value, const column_definition& column, const std::string& expected_type);
|
||||
rjson::value json_key_column_value(bytes_view cell, const column_definition& column);
|
||||
|
||||
partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
|
||||
|
||||
@@ -29,8 +29,6 @@
|
||||
#include "auth.hh"
|
||||
#include <cctype>
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "utils/overloaded_functor.hh"
|
||||
|
||||
static logging::logger slogger("alternator-server");
|
||||
|
||||
@@ -67,9 +65,9 @@ inline std::vector<std::string_view> split(std::string_view text, char separator
|
||||
// Internal Server Error.
|
||||
class api_handler : public handler_base {
|
||||
public:
|
||||
api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle) : _f_handle(
|
||||
[this, _handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
|
||||
return seastar::futurize_apply(_handle, std::move(req)).then_wrapped([this, rep = std::move(rep)](future<executor::request_return_type> resf) mutable {
|
||||
api_handler(const future_json_function& _handle) : _f_handle(
|
||||
[_handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
|
||||
return seastar::futurize_apply(_handle, std::move(req)).then_wrapped([rep = std::move(rep)](future<json::json_return_type> resf) mutable {
|
||||
if (resf.failed()) {
|
||||
// Exceptions of type api_error are wrapped as JSON and
|
||||
// returned to the client as expected. Other types of
|
||||
@@ -88,24 +86,20 @@ public:
|
||||
format("Internal server error: {}", std::current_exception()),
|
||||
reply::status_type::internal_server_error);
|
||||
}
|
||||
generate_error_reply(*rep, ret);
|
||||
// FIXME: what is this version number?
|
||||
rep->_content += "{\"__type\":\"com.amazonaws.dynamodb.v20120810#" + ret._type + "\"," +
|
||||
"\"message\":\"" + ret._msg + "\"}";
|
||||
rep->_status = ret._http_code;
|
||||
slogger.trace("api_handler error case: {}", rep->_content);
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
}
|
||||
slogger.trace("api_handler success case");
|
||||
auto res = resf.get0();
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const json::json_return_type& json_return_value) {
|
||||
slogger.trace("api_handler success case");
|
||||
if (json_return_value._body_writer) {
|
||||
rep->write_body("json", std::move(json_return_value._body_writer));
|
||||
} else {
|
||||
rep->_content += json_return_value._res;
|
||||
}
|
||||
},
|
||||
[&] (const api_error& err) {
|
||||
generate_error_reply(*rep, err);
|
||||
}
|
||||
}, res);
|
||||
|
||||
if (res._body_writer) {
|
||||
rep->write_body("json", std::move(res._body_writer));
|
||||
} else {
|
||||
rep->_content += res._res;
|
||||
}
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
});
|
||||
}), _type("json") { }
|
||||
@@ -121,66 +115,18 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
void generate_error_reply(reply& rep, const api_error& err) {
|
||||
rep._content += "{\"__type\":\"com.amazonaws.dynamodb.v20120810#" + err._type + "\"," +
|
||||
"\"message\":\"" + err._msg + "\"}";
|
||||
rep._status = err._http_code;
|
||||
slogger.trace("api_handler error case: {}", rep._content);
|
||||
}
|
||||
|
||||
future_handler_function _f_handle;
|
||||
sstring _type;
|
||||
};
|
||||
|
||||
class gated_handler : public handler_base {
|
||||
seastar::gate& _gate;
|
||||
public:
|
||||
gated_handler(seastar::gate& gate) : _gate(gate) {}
|
||||
virtual future<std::unique_ptr<reply>> do_handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) = 0;
|
||||
virtual future<std::unique_ptr<reply>> handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) final override {
|
||||
return with_gate(_gate, [this, &path, req = std::move(req), rep = std::move(rep)] () mutable {
|
||||
return do_handle(path, std::move(req), std::move(rep));
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
class health_handler : public gated_handler {
|
||||
public:
|
||||
health_handler(seastar::gate& pending_requests) : gated_handler(pending_requests) {}
|
||||
protected:
|
||||
virtual future<std::unique_ptr<reply>> do_handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
|
||||
class health_handler : public handler_base {
|
||||
virtual future<std::unique_ptr<reply>> handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
|
||||
rep->set_status(reply::status_type::ok);
|
||||
rep->write_body("txt", format("healthy: {}", req->get_header("Host")));
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
}
|
||||
};
|
||||
|
||||
class local_nodelist_handler : public gated_handler {
|
||||
public:
|
||||
local_nodelist_handler(seastar::gate& pending_requests) : gated_handler(pending_requests) {}
|
||||
protected:
|
||||
virtual future<std::unique_ptr<reply>> do_handle(const sstring& path, std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
|
||||
rjson::value results = rjson::empty_array();
|
||||
// It's very easy to get a list of all live nodes on the cluster,
|
||||
// using gms::get_local_gossiper().get_live_members(). But getting
|
||||
// just the list of live nodes in this DC needs more elaborate code:
|
||||
sstring local_dc = locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(
|
||||
utils::fb_utilities::get_broadcast_address());
|
||||
std::unordered_set<gms::inet_address> local_dc_nodes =
|
||||
service::get_local_storage_service().get_token_metadata().
|
||||
get_topology().get_datacenter_endpoints().at(local_dc);
|
||||
for (auto& ip : local_dc_nodes) {
|
||||
if (gms::get_local_gossiper().is_alive(ip)) {
|
||||
rjson::push_back(results, rjson::from_string(ip.to_sstring()));
|
||||
}
|
||||
}
|
||||
rep->set_status(reply::status_type::ok);
|
||||
rep->set_content_type("json");
|
||||
rep->_content = rjson::print(results);
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
}
|
||||
};
|
||||
|
||||
future<> server::verify_signature(const request& req) {
|
||||
if (!_enforce_authorization) {
|
||||
slogger.debug("Skipping authorization");
|
||||
@@ -191,7 +137,7 @@ future<> server::verify_signature(const request& req) {
|
||||
throw api_error("InvalidSignatureException", "Host header is mandatory for signature verification");
|
||||
}
|
||||
auto authorization_it = req._headers.find("Authorization");
|
||||
if (authorization_it == req._headers.end()) {
|
||||
if (host_it == req._headers.end()) {
|
||||
throw api_error("InvalidSignatureException", "Authorization header is mandatory for signature verification");
|
||||
}
|
||||
std::string host = host_it->second;
|
||||
@@ -268,8 +214,8 @@ future<> server::verify_signature(const request& req) {
|
||||
});
|
||||
}
|
||||
|
||||
future<executor::request_return_type> server::handle_api_request(std::unique_ptr<request>&& req) {
|
||||
_executor._stats.total_operations++;
|
||||
future<json::json_return_type> server::handle_api_request(std::unique_ptr<request>&& req) {
|
||||
_executor.local()._stats.total_operations++;
|
||||
sstring target = req->get_header(TARGET);
|
||||
std::vector<std::string_view> split_target = split(target, '.');
|
||||
//NOTICE(sarna): Target consists of Dynamo API version followed by a dot '.' and operation type (e.g. CreateTable)
|
||||
@@ -278,32 +224,17 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
return verify_signature(*req).then([this, op, req = std::move(req)] () mutable {
|
||||
auto callback_it = _callbacks.find(op);
|
||||
if (callback_it == _callbacks.end()) {
|
||||
_executor._stats.unsupported_operations++;
|
||||
_executor.local()._stats.unsupported_operations++;
|
||||
throw api_error("UnknownOperationException",
|
||||
format("Unsupported operation {}", op));
|
||||
}
|
||||
return with_gate(_pending_requests, [this, callback_it = std::move(callback_it), op = std::move(op), req = std::move(req)] () mutable {
|
||||
//FIXME: Client state can provide more context, e.g. client's endpoint address
|
||||
// We use unique_ptr because client_state cannot be moved or copied
|
||||
return do_with(std::make_unique<executor::client_state>(executor::client_state::internal_tag()),
|
||||
[this, callback_it = std::move(callback_it), op = std::move(op), req = std::move(req)] (std::unique_ptr<executor::client_state>& client_state) mutable {
|
||||
tracing::trace_state_ptr trace_state = executor::maybe_trace_query(*client_state, op, req->content);
|
||||
tracing::trace(trace_state, op);
|
||||
// JSON parsing can allocate up to roughly 2x the size of the raw document, + a couple of bytes for maintenance.
|
||||
// FIXME: by this time, the whole HTTP request was already read, so some memory is already occupied.
|
||||
// Once HTTP allows working on streams, we should grab the permit *before* reading the HTTP payload.
|
||||
size_t mem_estimate = req->content.size() * 3 + 8000;
|
||||
auto units_fut = get_units(*_memory_limiter, mem_estimate);
|
||||
if (_memory_limiter->waiters()) {
|
||||
++_executor._stats.requests_blocked_memory;
|
||||
}
|
||||
return units_fut.then([this, callback_it = std::move(callback_it), &client_state, trace_state, req = std::move(req)] (semaphore_units<> units) mutable {
|
||||
return _json_parser.parse(req->content).then([this, callback_it = std::move(callback_it), &client_state, trace_state,
|
||||
units = std::move(units), req = std::move(req)] (rjson::value json_request) mutable {
|
||||
return callback_it->second(_executor, *client_state, trace_state, make_service_permit(std::move(units)), std::move(json_request), std::move(req)).finally([trace_state] {});
|
||||
});
|
||||
});
|
||||
});
|
||||
//FIXME: Client state can provide more context, e.g. client's endpoint address
|
||||
// We use unique_ptr because client_state cannot be moved or copied
|
||||
return do_with(std::make_unique<executor::client_state>(executor::client_state::internal_tag()), [this, callback_it = std::move(callback_it), op = std::move(op), req = std::move(req)] (std::unique_ptr<executor::client_state>& client_state) mutable {
|
||||
client_state->set_raw_keyspace(executor::KEYSPACE_NAME);
|
||||
tracing::trace_state_ptr trace_state = executor::maybe_trace_query(*client_state, op, req->content);
|
||||
tracing::trace(trace_state, op);
|
||||
return callback_it->second(_executor.local(), *client_state, trace_state, std::move(req)).finally([trace_state] {});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -313,88 +244,35 @@ void server::set_routes(routes& r) {
|
||||
return handle_api_request(std::move(req));
|
||||
});
|
||||
|
||||
r.put(operation_type::POST, "/", req_handler);
|
||||
r.put(operation_type::GET, "/", new health_handler(_pending_requests));
|
||||
// The "/localnodes" request is a new Alternator feature, not supported by
|
||||
// DynamoDB and not required for DynamoDB compatibility. It allows a
|
||||
// client to enquire - using a trivial HTTP request without requiring
|
||||
// authentication - the list of all live nodes in the same data center of
|
||||
// the Alternator cluster. The client can use this list to balance its
|
||||
// request load to all the nodes in the same geographical region.
|
||||
// Note that this API exposes - openly without authentication - the
|
||||
// information on the cluster's members inside one data center. We do not
|
||||
// consider this to be a security risk, because an attacker can already
|
||||
// scan an entire subnet for nodes responding to the health request,
|
||||
// or even just scan for open ports.
|
||||
r.put(operation_type::GET, "/localnodes", new local_nodelist_handler(_pending_requests));
|
||||
r.add(operation_type::POST, url("/"), req_handler);
|
||||
r.add(operation_type::GET, url("/"), new health_handler);
|
||||
}
|
||||
|
||||
//FIXME: A way to immediately invalidate the cache should be considered,
|
||||
// e.g. when the system table which stores the keys is changed.
|
||||
// For now, this propagation may take up to 1 minute.
|
||||
server::server(executor& exec)
|
||||
: _http_server("http-alternator")
|
||||
, _https_server("https-alternator")
|
||||
, _executor(exec)
|
||||
, _key_cache(1024, 1min, slogger)
|
||||
, _enforce_authorization(false)
|
||||
, _enabled_servers{}
|
||||
, _pending_requests{}
|
||||
server::server(seastar::sharded<executor>& e)
|
||||
: _executor(e), _key_cache(1024, 1min, slogger), _enforce_authorization(false)
|
||||
, _callbacks{
|
||||
{"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.create_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DescribeTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.describe_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DeleteTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.delete_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"PutItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.put_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"UpdateItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.update_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"GetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DeleteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.delete_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"ListTables", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.list_tables(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"Scan", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.scan(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DescribeEndpoints", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.describe_endpoints(client_state, std::move(permit), std::move(json_request), req->get_header("Host"));
|
||||
}},
|
||||
{"BatchWriteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.batch_write_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"BatchGetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.batch_get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"Query", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.query(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"TagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.tag_resource(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"UntagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.untag_resource(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"ListTagsOfResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.list_tags_of_resource(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) {
|
||||
return e.maybe_create_keyspace().then([&e, &client_state, req = std::move(req), trace_state = std::move(trace_state)] () mutable { return e.create_table(client_state, std::move(trace_state), req->content); }); }
|
||||
},
|
||||
{"DescribeTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.describe_table(client_state, std::move(trace_state), req->content); }},
|
||||
{"DeleteTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.delete_table(client_state, std::move(trace_state), req->content); }},
|
||||
{"PutItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.put_item(client_state, std::move(trace_state), req->content); }},
|
||||
{"UpdateItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.update_item(client_state, std::move(trace_state), req->content); }},
|
||||
{"GetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.get_item(client_state, std::move(trace_state), req->content); }},
|
||||
{"DeleteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.delete_item(client_state, std::move(trace_state), req->content); }},
|
||||
{"ListTables", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.list_tables(client_state, req->content); }},
|
||||
{"Scan", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.scan(client_state, std::move(trace_state), req->content); }},
|
||||
{"DescribeEndpoints", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.describe_endpoints(client_state, req->content, req->get_header("Host")); }},
|
||||
{"BatchWriteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.batch_write_item(client_state, std::move(trace_state), req->content); }},
|
||||
{"BatchGetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.batch_get_item(client_state, std::move(trace_state), req->content); }},
|
||||
{"Query", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, std::unique_ptr<request> req) { return e.query(client_state, std::move(trace_state), req->content); }},
|
||||
} {
|
||||
}
|
||||
|
||||
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
|
||||
bool enforce_authorization, semaphore* memory_limiter) {
|
||||
_memory_limiter = memory_limiter;
|
||||
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds, bool enforce_authorization) {
|
||||
_enforce_authorization = enforce_authorization;
|
||||
if (!port && !https_port) {
|
||||
return make_exception_future<>(std::runtime_error("Either regular port or TLS port"
|
||||
@@ -402,21 +280,24 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
|
||||
}
|
||||
return seastar::async([this, addr, port, https_port, creds] {
|
||||
try {
|
||||
_executor.start().get();
|
||||
_executor.invoke_on_all([] (executor& e) {
|
||||
return e.start();
|
||||
}).get();
|
||||
|
||||
if (port) {
|
||||
set_routes(_http_server._routes);
|
||||
_http_server.set_content_length_limit(server::content_length_limit);
|
||||
_http_server.listen(socket_address{addr, *port}).get();
|
||||
_enabled_servers.push_back(std::ref(_http_server));
|
||||
_control.start().get();
|
||||
_control.set_routes(std::bind(&server::set_routes, this, std::placeholders::_1)).get();
|
||||
_control.listen(socket_address{addr, *port}).get();
|
||||
slogger.info("Alternator HTTP server listening on {} port {}", addr, *port);
|
||||
}
|
||||
if (https_port) {
|
||||
set_routes(_https_server._routes);
|
||||
_https_server.set_content_length_limit(server::content_length_limit);
|
||||
_https_server.set_tls_credentials(creds->build_server_credentials());
|
||||
_https_server.listen(socket_address{addr, *https_port}).get();
|
||||
_enabled_servers.push_back(std::ref(_https_server));
|
||||
_https_control.start().get();
|
||||
_https_control.set_routes(std::bind(&server::set_routes, this, std::placeholders::_1)).get();
|
||||
_https_control.server().invoke_on_all([creds] (http_server& serv) {
|
||||
return serv.set_tls_credentials(creds->build_server_credentials());
|
||||
}).get();
|
||||
|
||||
_https_control.listen(socket_address{addr, *https_port}).get();
|
||||
slogger.info("Alternator HTTPS server listening on {} port {}", addr, *https_port);
|
||||
}
|
||||
} catch (...) {
|
||||
@@ -429,55 +310,5 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
|
||||
});
|
||||
}
|
||||
|
||||
future<> server::stop() {
|
||||
return parallel_for_each(_enabled_servers, [] (http_server& server) {
|
||||
return server.stop();
|
||||
}).then([this] {
|
||||
return _pending_requests.close();
|
||||
}).then([this] {
|
||||
return _json_parser.stop();
|
||||
});
|
||||
}
|
||||
|
||||
server::json_parser::json_parser() : _run_parse_json_thread(async([this] {
|
||||
while (true) {
|
||||
_document_waiting.wait().get();
|
||||
if (_as.abort_requested()) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
_parsed_document = rjson::parse_yieldable(_raw_document);
|
||||
_current_exception = nullptr;
|
||||
} catch (...) {
|
||||
_current_exception = std::current_exception();
|
||||
}
|
||||
_document_parsed.signal();
|
||||
}
|
||||
})) {
|
||||
}
|
||||
|
||||
future<rjson::value> server::json_parser::parse(std::string_view content) {
|
||||
if (content.size() < yieldable_parsing_threshold) {
|
||||
return make_ready_future<rjson::value>(rjson::parse(content));
|
||||
}
|
||||
return with_semaphore(_parsing_sem, 1, [this, content] {
|
||||
_raw_document = content;
|
||||
_document_waiting.signal();
|
||||
return _document_parsed.wait().then([this] {
|
||||
if (_current_exception) {
|
||||
return make_exception_future<rjson::value>(_current_exception);
|
||||
}
|
||||
return make_ready_future<rjson::value>(std::move(_parsed_document));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> server::json_parser::stop() {
|
||||
_as.request_abort();
|
||||
_document_waiting.signal();
|
||||
_document_parsed.broken();
|
||||
return std::move(_run_parse_json_thread);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -27,56 +27,27 @@
|
||||
#include <seastar/net/tls.hh>
|
||||
#include <optional>
|
||||
#include <alternator/auth.hh>
|
||||
#include <utils/small_vector.hh>
|
||||
#include <seastar/core/units.hh>
|
||||
|
||||
namespace alternator {
|
||||
|
||||
class server {
|
||||
static constexpr size_t content_length_limit = 16*MB;
|
||||
using alternator_callback = std::function<future<executor::request_return_type>(executor&, executor::client_state&,
|
||||
tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<request>)>;
|
||||
using alternator_callback = std::function<future<json::json_return_type>(executor&, executor::client_state&, tracing::trace_state_ptr, std::unique_ptr<request>)>;
|
||||
using alternator_callbacks_map = std::unordered_map<std::string_view, alternator_callback>;
|
||||
|
||||
http_server _http_server;
|
||||
http_server _https_server;
|
||||
executor& _executor;
|
||||
|
||||
seastar::httpd::http_server_control _control;
|
||||
seastar::httpd::http_server_control _https_control;
|
||||
seastar::sharded<executor>& _executor;
|
||||
key_cache _key_cache;
|
||||
bool _enforce_authorization;
|
||||
utils::small_vector<std::reference_wrapper<seastar::httpd::http_server>, 2> _enabled_servers;
|
||||
gate _pending_requests;
|
||||
alternator_callbacks_map _callbacks;
|
||||
|
||||
semaphore* _memory_limiter;
|
||||
|
||||
class json_parser {
|
||||
static constexpr size_t yieldable_parsing_threshold = 16*KB;
|
||||
std::string_view _raw_document;
|
||||
rjson::value _parsed_document;
|
||||
std::exception_ptr _current_exception;
|
||||
semaphore _parsing_sem{1};
|
||||
condition_variable _document_waiting;
|
||||
condition_variable _document_parsed;
|
||||
abort_source _as;
|
||||
future<> _run_parse_json_thread;
|
||||
public:
|
||||
json_parser();
|
||||
future<rjson::value> parse(std::string_view content);
|
||||
future<> stop();
|
||||
};
|
||||
json_parser _json_parser;
|
||||
|
||||
public:
|
||||
server(executor& executor);
|
||||
server(seastar::sharded<executor>& executor);
|
||||
|
||||
future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
|
||||
bool enforce_authorization, semaphore* memory_limiter);
|
||||
future<> stop();
|
||||
seastar::future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds, bool enforce_authorization);
|
||||
private:
|
||||
void set_routes(seastar::httpd::routes& r);
|
||||
future<> verify_signature(const seastar::httpd::request& r);
|
||||
future<executor::request_return_type> handle_api_request(std::unique_ptr<request>&& req);
|
||||
future<json::json_return_type> handle_api_request(std::unique_ptr<request>&& req);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -85,12 +85,6 @@ stats::stats() : api_operations{} {
|
||||
seastar::metrics::description("number of total operations via Alternator API")),
|
||||
seastar::metrics::make_total_operations("reads_before_write", reads_before_write,
|
||||
seastar::metrics::description("number of performed read-before-write operations")),
|
||||
seastar::metrics::make_total_operations("write_using_lwt", write_using_lwt,
|
||||
seastar::metrics::description("number of writes that used LWT")),
|
||||
seastar::metrics::make_total_operations("shard_bounce_for_lwt", shard_bounce_for_lwt,
|
||||
seastar::metrics::description("number writes that had to be bounced from this shard because of LWT requirements")),
|
||||
seastar::metrics::make_total_operations("requests_blocked_memory", requests_blocked_memory,
|
||||
seastar::metrics::description("Counts a number of requests blocked due to memory pressure.")),
|
||||
seastar::metrics::make_total_operations("filtered_rows_read_total", cql_stats.filtered_rows_read_total,
|
||||
seastar::metrics::description("number of rows read during filtering operations")),
|
||||
seastar::metrics::make_total_operations("filtered_rows_matched_total", cql_stats.filtered_rows_matched_total,
|
||||
|
||||
@@ -84,9 +84,6 @@ public:
|
||||
uint64_t total_operations = 0;
|
||||
uint64_t unsupported_operations = 0;
|
||||
uint64_t reads_before_write = 0;
|
||||
uint64_t write_using_lwt = 0;
|
||||
uint64_t shard_bounce_for_lwt = 0;
|
||||
uint64_t requests_blocked_memory = 0;
|
||||
// CQL-derived stats
|
||||
cql3::cql_stats cql_stats;
|
||||
private:
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
* Copyright 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "serializer.hh"
|
||||
#include "schema.hh"
|
||||
#include "db/extensions.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
class tags_extension : public schema_extension {
|
||||
public:
|
||||
static constexpr auto NAME = "scylla_tags";
|
||||
|
||||
tags_extension() = default;
|
||||
explicit tags_extension(const std::map<sstring, sstring>& tags) : _tags(std::move(tags)) {}
|
||||
explicit tags_extension(bytes b) : _tags(tags_extension::deserialize(b)) {}
|
||||
explicit tags_extension(const sstring& s) {
|
||||
throw std::logic_error("Cannot create tags from string");
|
||||
}
|
||||
bytes serialize() const override {
|
||||
return ser::serialize_to_buffer<bytes>(_tags);
|
||||
}
|
||||
static std::map<sstring, sstring> deserialize(bytes_view buffer) {
|
||||
return ser::deserialize_from_buffer(buffer, boost::type<std::map<sstring, sstring>>());
|
||||
}
|
||||
const std::map<sstring, sstring>& tags() const {
|
||||
return _tags;
|
||||
}
|
||||
private:
|
||||
std::map<sstring, sstring> _tags;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -70,7 +70,7 @@
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Force a major compaction of this column family",
|
||||
"type":"void",
|
||||
"type":"string",
|
||||
"nickname":"force_major_compaction",
|
||||
"produces":[
|
||||
"application/json"
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
{
|
||||
"apiVersion":"0.0.1",
|
||||
"swaggerVersion":"1.2",
|
||||
"basePath":"{{Protocol}}://{{Host}}",
|
||||
"resourcePath":"/error_injection",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"apis":[
|
||||
{
|
||||
"path":"/v2/error_injection/injection/{injection}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Activate an injection that triggers an error in code",
|
||||
"type":"void",
|
||||
"nickname":"enable_injection",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"injection",
|
||||
"description":"injection name, should correspond to an injection added in code",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
},
|
||||
{
|
||||
"name":"one_shot",
|
||||
"description":"boolean flag indicating whether the injection should be enabled to trigger only once",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"boolean",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"method":"DELETE",
|
||||
"summary":"Deactivate an injection previously activated by the API",
|
||||
"type":"void",
|
||||
"nickname":"disable_injection",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"injection",
|
||||
"description":"injection name",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/v2/error_injection/injection",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"List all enabled injections on all shards, i.e. injections that will trigger an error in the code",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"string"
|
||||
},
|
||||
"nickname":"get_enabled_injections_on_all",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[]
|
||||
},
|
||||
{
|
||||
"method":"DELETE",
|
||||
"summary":"Deactivate all injections previously activated on all shards by the API",
|
||||
"type":"void",
|
||||
"nickname":"disable_on_all",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -641,21 +641,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path": "/storage_proxy/metrics/cas_write/failed_read_round_optimization",
|
||||
"operations": [
|
||||
{
|
||||
"method": "GET",
|
||||
"summary": "Get cas write metrics",
|
||||
"type": "long",
|
||||
"nickname": "get_cas_write_metrics_failed_read_round_optimization",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"parameters": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path": "/storage_proxy/metrics/cas_read/unfinished_commit",
|
||||
"operations": [
|
||||
|
||||
@@ -582,15 +582,7 @@
|
||||
},
|
||||
{
|
||||
"name":"kn",
|
||||
"description":"Comma seperated keyspaces name that their snapshot will be deleted",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"cf",
|
||||
"description":"an optional table name that its snapshot will be deleted",
|
||||
"description":"Comma seperated keyspaces name to snapshot",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
|
||||
16
api/api.cc
16
api/api.cc
@@ -36,7 +36,6 @@
|
||||
#include "endpoint_snitch.hh"
|
||||
#include "compaction_manager.hh"
|
||||
#include "hinted_handoff.hh"
|
||||
#include "error_injection.hh"
|
||||
#include <seastar/http/exception.hh>
|
||||
#include "stream_manager.hh"
|
||||
#include "system.hh"
|
||||
@@ -69,19 +68,13 @@ future<> set_server_init(http_context& ctx) {
|
||||
rb->set_api_doc(r);
|
||||
rb02->set_api_doc(r);
|
||||
rb02->register_api_file(r, "swagger20_header");
|
||||
set_config(rb02, ctx, r);
|
||||
rb->register_function(r, "system",
|
||||
"The system related API");
|
||||
set_system(ctx, r);
|
||||
});
|
||||
}
|
||||
|
||||
future<> set_server_config(http_context& ctx) {
|
||||
auto rb02 = std::make_shared < api_registry_builder20 > (ctx.api_doc, "/v2");
|
||||
return ctx.http_server.set_routes([&ctx, rb02](routes& r) {
|
||||
set_config(rb02, ctx, r);
|
||||
});
|
||||
}
|
||||
|
||||
static future<> register_api(http_context& ctx, const sstring& api_name,
|
||||
const sstring api_desc,
|
||||
std::function<void(http_context& ctx, routes& r)> f) {
|
||||
@@ -97,10 +90,6 @@ future<> set_server_storage_service(http_context& ctx) {
|
||||
return register_api(ctx, "storage_service", "The storage service API", set_storage_service);
|
||||
}
|
||||
|
||||
future<> set_server_snapshot(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { set_snapshot(ctx, r); });
|
||||
}
|
||||
|
||||
future<> set_server_snitch(http_context& ctx) {
|
||||
return register_api(ctx, "endpoint_snitch_info", "The endpoint snitch info API", set_endpoint_snitch);
|
||||
}
|
||||
@@ -164,9 +153,6 @@ future<> set_server_done(http_context& ctx) {
|
||||
rb->register_function(r, "collectd",
|
||||
"The collectd API");
|
||||
set_collectd(ctx, r);
|
||||
rb->register_function(r, "error_injection",
|
||||
"The error injection API");
|
||||
set_error_injection(ctx, r);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include <seastar/http/httpd.hh>
|
||||
|
||||
namespace service { class load_meter; }
|
||||
namespace locator { class token_metadata; }
|
||||
|
||||
namespace api {
|
||||
|
||||
@@ -35,20 +34,16 @@ struct http_context {
|
||||
distributed<database>& db;
|
||||
distributed<service::storage_proxy>& sp;
|
||||
service::load_meter& lmeter;
|
||||
sharded<locator::token_metadata>& token_metadata;
|
||||
|
||||
http_context(distributed<database>& _db,
|
||||
distributed<service::storage_proxy>& _sp,
|
||||
service::load_meter& _lm, sharded<locator::token_metadata>& _tm)
|
||||
: db(_db), sp(_sp), lmeter(_lm), token_metadata(_tm) {
|
||||
service::load_meter& _lm)
|
||||
: db(_db), sp(_sp), lmeter(_lm) {
|
||||
}
|
||||
};
|
||||
|
||||
future<> set_server_init(http_context& ctx);
|
||||
future<> set_server_config(http_context& ctx);
|
||||
future<> set_server_snitch(http_context& ctx);
|
||||
future<> set_server_storage_service(http_context& ctx);
|
||||
future<> set_server_snapshot(http_context& ctx);
|
||||
future<> set_server_gossip(http_context& ctx);
|
||||
future<> set_server_load_sstable(http_context& ctx);
|
||||
future<> set_server_messaging_service(http_context& ctx);
|
||||
|
||||
@@ -64,7 +64,7 @@ static const char* str_to_regex(const sstring& v) {
|
||||
void set_collectd(http_context& ctx, routes& r) {
|
||||
cd::get_collectd.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
|
||||
auto id = ::make_shared<scollectd::type_instance_id>(req->param["pluginid"],
|
||||
auto id = make_shared<scollectd::type_instance_id>(req->param["pluginid"],
|
||||
req->get_query_param("instance"), req->get_query_param("type"),
|
||||
req->get_query_param("type_instance"));
|
||||
|
||||
|
||||
@@ -994,15 +994,5 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
});
|
||||
});
|
||||
|
||||
cf::force_major_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
if (req->get_query_param("split_output") != "") {
|
||||
fail(unimplemented::cause::API);
|
||||
}
|
||||
return foreach_column_family(ctx, req->param["name"], [](column_family &cf) {
|
||||
return cf.compact_all_sstables();
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "api/api-doc/error_injection.json.hh"
|
||||
#include "api/api.hh"
|
||||
|
||||
#include <seastar/http/exception.hh>
|
||||
#include "log.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "seastar/core/future-util.hh"
|
||||
|
||||
namespace api {
|
||||
|
||||
namespace hf = httpd::error_injection_json;
|
||||
|
||||
void set_error_injection(http_context& ctx, routes& r) {
|
||||
|
||||
hf::enable_injection.set(r, [](std::unique_ptr<request> req) {
|
||||
sstring injection = req->param["injection"];
|
||||
bool one_shot = req->get_query_param("one_shot") == "True";
|
||||
auto& errinj = utils::get_local_injector();
|
||||
errinj.enable_on_all(injection, one_shot);
|
||||
return make_ready_future<json::json_return_type>(json::json_void());
|
||||
});
|
||||
|
||||
hf::get_enabled_injections_on_all.set(r, [](std::unique_ptr<request> req) {
|
||||
auto& errinj = utils::get_local_injector();
|
||||
auto ret = errinj.enabled_injections_on_all();
|
||||
return make_ready_future<json::json_return_type>(ret);
|
||||
});
|
||||
|
||||
hf::disable_injection.set(r, [](std::unique_ptr<request> req) {
|
||||
sstring injection = req->param["injection"];
|
||||
|
||||
auto& errinj = utils::get_local_injector();
|
||||
errinj.disable_on_all(injection);
|
||||
return make_ready_future<json::json_return_type>(json::json_void());
|
||||
});
|
||||
|
||||
hf::disable_on_all.set(r, [](std::unique_ptr<request> req) {
|
||||
auto& errinj = utils::get_local_injector();
|
||||
errinj.disable_on_all();
|
||||
return make_ready_future<json::json_return_type>(json::json_void());
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
} // namespace api
|
||||
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "api.hh"
|
||||
|
||||
namespace api {
|
||||
|
||||
void set_error_injection(http_context& ctx, routes& r);
|
||||
|
||||
}
|
||||
@@ -27,7 +27,6 @@
|
||||
#include "db/config.hh"
|
||||
#include "utils/histogram.hh"
|
||||
#include "database.hh"
|
||||
#include "seastar/core/scheduling_specific.hh"
|
||||
|
||||
namespace api {
|
||||
|
||||
@@ -35,70 +34,12 @@ namespace sp = httpd::storage_proxy_json;
|
||||
using proxy = service::storage_proxy;
|
||||
using namespace json;
|
||||
|
||||
|
||||
/**
|
||||
* This function implement a two dimentional map reduce where
|
||||
* the first level is a distributed storage_proxy class and the
|
||||
* second level is the stats per scheduling group class.
|
||||
* @param d - a reference to the storage_proxy distributed class.
|
||||
* @param mapper - the internal mapper that is used to map the internal
|
||||
* stat class into a value of type `V`.
|
||||
* @param reducer - the reducer that is used in both outer and inner
|
||||
* aggregations.
|
||||
* @param initial_value - the initial value to use for both aggregations
|
||||
* @return A future that resolves to the result of the aggregation.
|
||||
*/
|
||||
template<typename V, typename Reducer, typename InnerMapper>
|
||||
future<V> two_dimensional_map_reduce(distributed<service::storage_proxy>& d,
|
||||
InnerMapper mapper, Reducer reducer, V initial_value) {
|
||||
return d.map_reduce0( [mapper, reducer, initial_value] (const service::storage_proxy& sp) {
|
||||
return map_reduce_scheduling_group_specific<service::storage_proxy_stats::stats>(
|
||||
mapper, reducer, initial_value, sp.get_stats_key());
|
||||
}, initial_value, reducer);
|
||||
static future<utils::rate_moving_average> sum_timed_rate(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
|
||||
return d.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average(),
|
||||
std::plus<utils::rate_moving_average>());
|
||||
}
|
||||
|
||||
/**
|
||||
* This function implement a two dimentional map reduce where
|
||||
* the first level is a distributed storage_proxy class and the
|
||||
* second level is the stats per scheduling group class.
|
||||
* @param d - a reference to the storage_proxy distributed class.
|
||||
* @param f - a field pointer which is the implicit internal reducer.
|
||||
* @param reducer - the reducer that is used in both outer and inner
|
||||
* aggregations.
|
||||
* @param initial_value - the initial value to use for both aggregations* @return
|
||||
* @return A future that resolves to the result of the aggregation.
|
||||
*/
|
||||
template<typename V, typename Reducer, typename F>
|
||||
future<V> two_dimensional_map_reduce(distributed<service::storage_proxy>& d,
|
||||
V F::*f, Reducer reducer, V initial_value) {
|
||||
return two_dimensional_map_reduce(d, [f] (F& stats) {
|
||||
return stats.*f;
|
||||
}, reducer, initial_value);
|
||||
}
|
||||
|
||||
/**
|
||||
* A partial Specialization of sum_stats for the storage proxy
|
||||
* case where the get stats function doesn't return a
|
||||
* stats object with fields but a per scheduling group
|
||||
* stats object, the name was also changed since functions
|
||||
* partial specialization is not supported in C++.
|
||||
*
|
||||
*/
|
||||
template<typename V, typename F>
|
||||
future<json::json_return_type> sum_stats_storage_proxy(distributed<proxy>& d, V F::*f) {
|
||||
return two_dimensional_map_reduce(d, [f] (F& stats) { return stats.*f; }, std::plus<V>(), V(0)).then([] (V val) {
|
||||
return make_ready_future<json::json_return_type>(val);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
static future<utils::rate_moving_average> sum_timed_rate(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
|
||||
return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
|
||||
return (stats.*f).rate();
|
||||
}, std::plus<utils::rate_moving_average>(), utils::rate_moving_average());
|
||||
}
|
||||
|
||||
static future<json::json_return_type> sum_timed_rate_as_obj(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
|
||||
static future<json::json_return_type> sum_timed_rate_as_obj(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
|
||||
return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) {
|
||||
httpd::utils_json::rate_moving_average m;
|
||||
m = val;
|
||||
@@ -110,72 +51,29 @@ httpd::utils_json::rate_moving_average_and_histogram get_empty_moving_average()
|
||||
return timer_to_json(utils::rate_moving_average_and_histogram());
|
||||
}
|
||||
|
||||
static future<json::json_return_type> sum_timed_rate_as_long(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
|
||||
static future<json::json_return_type> sum_timed_rate_as_long(distributed<proxy>& d, utils::timed_rate_moving_average proxy::stats::*f) {
|
||||
return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) {
|
||||
return make_ready_future<json::json_return_type>(val.count);
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> sum_estimated_histogram(http_context& ctx, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {
|
||||
|
||||
return two_dimensional_map_reduce(ctx.sp, f, utils::estimated_histogram_merge,
|
||||
utils::estimated_histogram()).then([](const utils::estimated_histogram& val) {
|
||||
static future<json::json_return_type> sum_estimated_histogram(http_context& ctx, utils::estimated_histogram proxy::stats::*f) {
|
||||
return ctx.sp.map_reduce0([f](const proxy& p) {return p.get_stats().*f;}, utils::estimated_histogram(),
|
||||
utils::estimated_histogram_merge).then([](const utils::estimated_histogram& val) {
|
||||
utils_json::estimated_histogram res;
|
||||
res = val;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> total_latency(http_context& ctx, utils::timed_rate_moving_average_and_histogram service::storage_proxy_stats::stats::*f) {
|
||||
return two_dimensional_map_reduce(ctx.sp, [f] (service::storage_proxy_stats::stats& stats) {
|
||||
return (stats.*f).hist.mean * (stats.*f).hist.count;
|
||||
}, std::plus<double>(), 0.0).then([](double val) {
|
||||
static future<json::json_return_type> total_latency(http_context& ctx, utils::timed_rate_moving_average_and_histogram proxy::stats::*f) {
|
||||
return ctx.sp.map_reduce0([f](const proxy& p) {return (p.get_stats().*f).hist.mean * (p.get_stats().*f).hist.count;}, 0.0,
|
||||
std::plus<double>()).then([](double val) {
|
||||
int64_t res = val;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* A partial Specialization of sum_histogram_stats
|
||||
* for the storage proxy case where the get stats
|
||||
* function doesn't return a stats object with
|
||||
* fields but a per scheduling group stats object,
|
||||
* the name was also changed since function partial
|
||||
* specialization is not supported in C++.
|
||||
*/
|
||||
template<typename F>
|
||||
future<json::json_return_type>
|
||||
sum_histogram_stats_storage_proxy(distributed<proxy>& d,
|
||||
utils::timed_rate_moving_average_and_histogram F::*f) {
|
||||
return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
|
||||
return (stats.*f).hist;
|
||||
}, std::plus<utils::ihistogram>(), utils::ihistogram()).
|
||||
then([](const utils::ihistogram& val) {
|
||||
return make_ready_future<json::json_return_type>(to_json(val));
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* A partial Specialization of sum_timer_stats for the
|
||||
* storage proxy case where the get stats function
|
||||
* doesn't return a stats object with fields but a
|
||||
* per scheduling group stats object, the name
|
||||
* was also changed since partial function specialization
|
||||
* is not supported in C++.
|
||||
*/
|
||||
template<typename F>
|
||||
future<json::json_return_type>
|
||||
sum_timer_stats_storage_proxy(distributed<proxy>& d,
|
||||
utils::timed_rate_moving_average_and_histogram F::*f) {
|
||||
|
||||
return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
|
||||
return (stats.*f).rate();
|
||||
}, std::plus<utils::rate_moving_average_and_histogram>(),
|
||||
utils::rate_moving_average_and_histogram()).then([](const utils::rate_moving_average_and_histogram& val) {
|
||||
return make_ready_future<json::json_return_type>(timer_to_json(val));
|
||||
});
|
||||
}
|
||||
|
||||
void set_storage_proxy(http_context& ctx, routes& r) {
|
||||
sp::get_total_hints.set(r, [](std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
@@ -325,15 +223,15 @@ void set_storage_proxy(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
sp::get_read_repair_attempted.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_attempts);
|
||||
return sum_stats(ctx.sp, &proxy::stats::read_repair_attempts);
|
||||
});
|
||||
|
||||
sp::get_read_repair_repaired_blocking.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_repaired_blocking);
|
||||
return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_blocking);
|
||||
});
|
||||
|
||||
sp::get_read_repair_repaired_background.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read_repair_repaired_background);
|
||||
return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_background);
|
||||
});
|
||||
|
||||
sp::get_schema_versions.set(r, [](std::unique_ptr<request> req) {
|
||||
@@ -377,10 +275,6 @@ void set_storage_proxy(http_context& ctx, routes& r) {
|
||||
return sum_stats(ctx.sp, &proxy::stats::cas_write_condition_not_met);
|
||||
});
|
||||
|
||||
sp::get_cas_write_metrics_failed_read_round_optimization.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_stats(ctx.sp, &proxy::stats::cas_failed_read_round_optimization);
|
||||
});
|
||||
|
||||
sp::get_cas_read_metrics_unfinished_commit.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_stats(ctx.sp, &proxy::stats::cas_read_unfinished_commit);
|
||||
});
|
||||
@@ -390,71 +284,71 @@ void set_storage_proxy(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
sp::get_read_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::read_timeouts);
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::read_timeouts);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::read_unavailables);
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::read_unavailables);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::range_slice_timeouts);
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::range_slice_timeouts);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::range_slice_unavailables);
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::range_slice_unavailables);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_timeouts.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::write_timeouts);
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::write_timeouts);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_unavailables.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_long(ctx.sp, &service::storage_proxy_stats::stats::write_unavailables);
|
||||
return sum_timed_rate_as_long(ctx.sp, &proxy::stats::write_unavailables);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::read_timeouts);
|
||||
return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::read_timeouts);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::read_unavailables);
|
||||
return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::read_unavailables);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::range_slice_timeouts);
|
||||
return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::range_slice_timeouts);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::range_slice_unavailables);
|
||||
return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::range_slice_unavailables);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_timeouts_rates.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::write_timeouts);
|
||||
return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::write_timeouts);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_unavailables_rates.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timed_rate_as_obj(ctx.sp, &service::storage_proxy_stats::stats::write_unavailables);
|
||||
return sum_timed_rate_as_obj(ctx.sp, &proxy::stats::write_unavailables);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
|
||||
return sum_histogram_stats(ctx.sp, &proxy::stats::range);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::write);
|
||||
return sum_histogram_stats(ctx.sp, &proxy::stats::write);
|
||||
});
|
||||
|
||||
sp::get_read_metrics_latency_histogram_depricated.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_histogram_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read);
|
||||
return sum_histogram_stats(ctx.sp, &proxy::stats::read);
|
||||
});
|
||||
|
||||
sp::get_range_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
|
||||
return sum_timer_stats(ctx.sp, &proxy::stats::range);
|
||||
});
|
||||
|
||||
sp::get_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::write);
|
||||
return sum_timer_stats(ctx.sp, &proxy::stats::write);
|
||||
});
|
||||
sp::get_cas_write_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timer_stats(ctx.sp, &proxy::stats::cas_write);
|
||||
@@ -473,30 +367,30 @@ void set_storage_proxy(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
sp::get_read_metrics_latency_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::read);
|
||||
return sum_timer_stats(ctx.sp, &proxy::stats::read);
|
||||
});
|
||||
|
||||
sp::get_read_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::estimated_read);
|
||||
return sum_estimated_histogram(ctx, &proxy::stats::estimated_read);
|
||||
});
|
||||
|
||||
sp::get_read_latency.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return total_latency(ctx, &service::storage_proxy_stats::stats::read);
|
||||
return total_latency(ctx, &proxy::stats::read);
|
||||
});
|
||||
sp::get_write_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_estimated_histogram(ctx, &service::storage_proxy_stats::stats::estimated_write);
|
||||
return sum_estimated_histogram(ctx, &proxy::stats::estimated_write);
|
||||
});
|
||||
|
||||
sp::get_write_latency.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return total_latency(ctx, &service::storage_proxy_stats::stats::write);
|
||||
return total_latency(ctx, &proxy::stats::write);
|
||||
});
|
||||
|
||||
sp::get_range_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return sum_timer_stats_storage_proxy(ctx.sp, &service::storage_proxy_stats::stats::range);
|
||||
return sum_timer_stats(ctx.sp, &proxy::stats::range);
|
||||
});
|
||||
|
||||
sp::get_range_latency.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return total_latency(ctx, &service::storage_proxy_stats::stats::range);
|
||||
return total_latency(ctx, &proxy::stats::range);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@
|
||||
#include "database.hh"
|
||||
#include "db/extensions.hh"
|
||||
|
||||
sstables::sstable::version_types get_highest_supported_format();
|
||||
|
||||
namespace api {
|
||||
|
||||
namespace ss = httpd::storage_service_json;
|
||||
@@ -72,35 +74,35 @@ static ss::token_range token_range_endpoints_to_json(const dht::token_range_endp
|
||||
return r;
|
||||
}
|
||||
|
||||
using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<request>, sstring, std::vector<sstring>)>;
|
||||
|
||||
static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
|
||||
return [&ctx, f = std::move(f)](std::unique_ptr<request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto column_families = split_cf(req->get_query_param("cf"));
|
||||
if (column_families.empty()) {
|
||||
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
|
||||
}
|
||||
return f(ctx, std::move(req), std::move(keyspace), std::move(column_families));
|
||||
};
|
||||
}
|
||||
|
||||
void set_storage_service(http_context& ctx, routes& r) {
|
||||
using ks_cf_func = std::function<future<json::json_return_type>(std::unique_ptr<request>, sstring, std::vector<sstring>)>;
|
||||
|
||||
auto wrap_ks_cf = [&ctx](ks_cf_func f) {
|
||||
return [&ctx, f = std::move(f)](std::unique_ptr<request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto column_families = split_cf(req->get_query_param("cf"));
|
||||
if (column_families.empty()) {
|
||||
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
|
||||
}
|
||||
return f(std::move(req), std::move(keyspace), std::move(column_families));
|
||||
};
|
||||
};
|
||||
|
||||
ss::local_hostid.set(r, [](std::unique_ptr<request> req) {
|
||||
return db::system_keyspace::get_local_host_id().then([](const utils::UUID& id) {
|
||||
return make_ready_future<json::json_return_type>(id.to_sstring());
|
||||
});
|
||||
});
|
||||
|
||||
ss::get_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().sorted_tokens(), [](const dht::token& i) {
|
||||
ss::get_tokens.set(r, [] (std::unique_ptr<request> req) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().get_token_metadata().sorted_tokens(), [](const dht::token& i) {
|
||||
return boost::lexical_cast<std::string>(i);
|
||||
}));
|
||||
});
|
||||
|
||||
ss::get_node_tokens.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
ss::get_node_tokens.set(r, [] (std::unique_ptr<request> req) {
|
||||
gms::inet_address addr(req->param["endpoint"]);
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(ctx.token_metadata.local().get_tokens(addr), [](const dht::token& i) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().get_token_metadata().get_tokens(addr), [](const dht::token& i) {
|
||||
return boost::lexical_cast<std::string>(i);
|
||||
}));
|
||||
});
|
||||
@@ -118,8 +120,8 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
}));
|
||||
});
|
||||
|
||||
ss::get_leaving_nodes.set(r, [&ctx](const_req req) {
|
||||
return container_to_vec(ctx.token_metadata.local().get_leaving_endpoints());
|
||||
ss::get_leaving_nodes.set(r, [](const_req req) {
|
||||
return container_to_vec(service::get_local_storage_service().get_token_metadata().get_leaving_endpoints());
|
||||
});
|
||||
|
||||
ss::get_moving_nodes.set(r, [](const_req req) {
|
||||
@@ -127,8 +129,8 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
return container_to_vec(addr);
|
||||
});
|
||||
|
||||
ss::get_joining_nodes.set(r, [&ctx](const_req req) {
|
||||
auto points = ctx.token_metadata.local().get_bootstrap_tokens();
|
||||
ss::get_joining_nodes.set(r, [](const_req req) {
|
||||
auto points = service::get_local_storage_service().get_token_metadata().get_bootstrap_tokens();
|
||||
std::unordered_set<sstring> addr;
|
||||
for (auto i: points) {
|
||||
addr.insert(boost::lexical_cast<std::string>(i.second));
|
||||
@@ -180,9 +182,10 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
return make_ready_future<json::json_return_type>(stream_range_as_array(service::get_local_storage_service().describe_ring(keyspace), token_range_endpoints_to_json));
|
||||
});
|
||||
|
||||
ss::get_host_id_map.set(r, [&ctx](const_req req) {
|
||||
ss::get_host_id_map.set(r, [](const_req req) {
|
||||
std::vector<ss::mapper> res;
|
||||
return map_to_key_value(ctx.token_metadata.local().get_endpoint_to_host_id_map_for_reading(), res);
|
||||
return map_to_key_value(service::get_local_storage_service().
|
||||
get_token_metadata().get_endpoint_to_host_id_map_for_reading(), res);
|
||||
});
|
||||
|
||||
ss::get_load.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
@@ -215,6 +218,67 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
req.get_query_param("key")));
|
||||
});
|
||||
|
||||
ss::get_snapshot_details.set(r, [](std::unique_ptr<request> req) {
|
||||
return service::get_local_storage_service().get_snapshot_details().then([] (auto result) {
|
||||
std::vector<ss::snapshots> res;
|
||||
for (auto& map: result) {
|
||||
ss::snapshots all_snapshots;
|
||||
all_snapshots.key = map.first;
|
||||
|
||||
std::vector<ss::snapshot> snapshot;
|
||||
for (auto& cf: map.second) {
|
||||
ss::snapshot s;
|
||||
s.ks = cf.ks;
|
||||
s.cf = cf.cf;
|
||||
s.live = cf.live;
|
||||
s.total = cf.total;
|
||||
snapshot.push_back(std::move(s));
|
||||
}
|
||||
all_snapshots.value = std::move(snapshot);
|
||||
res.push_back(std::move(all_snapshots));
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(std::move(res));
|
||||
});
|
||||
});
|
||||
|
||||
ss::take_snapshot.set(r, [](std::unique_ptr<request> req) {
|
||||
auto tag = req->get_query_param("tag");
|
||||
auto column_family = req->get_query_param("cf");
|
||||
|
||||
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
|
||||
|
||||
auto resp = make_ready_future<>();
|
||||
if (column_family.empty()) {
|
||||
resp = service::get_local_storage_service().take_snapshot(tag, keynames);
|
||||
} else {
|
||||
if (keynames.empty()) {
|
||||
throw httpd::bad_param_exception("The keyspace of column families must be specified");
|
||||
}
|
||||
if (keynames.size() > 1) {
|
||||
throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
|
||||
}
|
||||
resp = service::get_local_storage_service().take_column_family_snapshot(keynames[0], column_family, tag);
|
||||
}
|
||||
return resp.then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
ss::del_snapshot.set(r, [](std::unique_ptr<request> req) {
|
||||
auto tag = req->get_query_param("tag");
|
||||
|
||||
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
|
||||
return service::get_local_storage_service().clear_snapshot(tag, keynames).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
ss::true_snapshots_size.set(r, [](std::unique_ptr<request> req) {
|
||||
return service::get_local_storage_service().true_snapshots_size().then([] (int64_t size) {
|
||||
return make_ready_future<json::json_return_type>(size);
|
||||
});
|
||||
});
|
||||
|
||||
ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto column_families = split_cf(req->get_query_param("cf"));
|
||||
@@ -252,8 +316,8 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
for (auto cf : column_families) {
|
||||
column_families_vec.push_back(&db.find_column_family(keyspace, cf));
|
||||
}
|
||||
return parallel_for_each(column_families_vec, [&cm, &db] (column_family* cf) {
|
||||
return cm.perform_cleanup(db, cf);
|
||||
return parallel_for_each(column_families_vec, [&cm] (column_family* cf) {
|
||||
return cm.perform_cleanup(cf);
|
||||
});
|
||||
}).then([]{
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
@@ -261,7 +325,32 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
});
|
||||
});
|
||||
|
||||
ss::upgrade_sstables.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
|
||||
ss::scrub.set(r, wrap_ks_cf([&ctx](std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
|
||||
// TODO: respect this
|
||||
auto skip_corrupted = req->get_query_param("skip_corrupted");
|
||||
|
||||
auto f = make_ready_future<>();
|
||||
if (!req_param<bool>(*req, "disable_snapshot", false)) {
|
||||
auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
|
||||
f = parallel_for_each(column_families, [keyspace, tag](sstring cf) {
|
||||
return service::get_local_storage_service().take_column_family_snapshot(keyspace, cf, tag);
|
||||
});
|
||||
}
|
||||
|
||||
return f.then([&ctx, keyspace, column_families] {
|
||||
return ctx.db.invoke_on_all([=] (database& db) {
|
||||
return do_for_each(column_families, [=, &db](sstring cfname) {
|
||||
auto& cm = db.get_compaction_manager();
|
||||
auto& cf = db.find_column_family(keyspace, cfname);
|
||||
return cm.perform_sstable_scrub(&cf);
|
||||
});
|
||||
});
|
||||
}).then([]{
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
}));
|
||||
|
||||
ss::upgrade_sstables.set(r, wrap_ks_cf([&ctx](std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
|
||||
bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);
|
||||
|
||||
return ctx.db.invoke_on_all([=] (database& db) {
|
||||
@@ -947,107 +1036,4 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
|
||||
}
|
||||
|
||||
void set_snapshot(http_context& ctx, routes& r) {
|
||||
ss::get_snapshot_details.set(r, [](std::unique_ptr<request> req) {
|
||||
std::function<future<>(output_stream<char>&&)> f = [](output_stream<char>&& s) {
|
||||
return do_with(output_stream<char>(std::move(s)), true, [] (output_stream<char>& s, bool& first){
|
||||
return s.write("[").then([&s, &first] {
|
||||
return service::get_local_storage_service().get_snapshot_details().then([&s, &first] (std::unordered_map<sstring, std::vector<service::storage_service::snapshot_details>>&& result) {
|
||||
return do_with(std::move(result), [&s, &first](const std::unordered_map<sstring, std::vector<service::storage_service::snapshot_details>>& result) {
|
||||
return do_for_each(result, [&s, &result,&first](std::tuple<sstring, std::vector<service::storage_service::snapshot_details>>&& map){
|
||||
return do_with(ss::snapshots(), [&s, &first, &result, &map](ss::snapshots& all_snapshots) {
|
||||
all_snapshots.key = std::get<0>(map);
|
||||
future<> f = first ? make_ready_future<>() : s.write(", ");
|
||||
first = false;
|
||||
std::vector<ss::snapshot> snapshot;
|
||||
for (auto& cf: std::get<1>(map)) {
|
||||
ss::snapshot snp;
|
||||
snp.ks = cf.ks;
|
||||
snp.cf = cf.cf;
|
||||
snp.live = cf.live;
|
||||
snp.total = cf.total;
|
||||
snapshot.push_back(std::move(snp));
|
||||
}
|
||||
all_snapshots.value = std::move(snapshot);
|
||||
return f.then([&s, &all_snapshots] {
|
||||
return all_snapshots.write(s);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}).then([&s] {
|
||||
return s.write("]").then([&s] {
|
||||
return s.close();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
return make_ready_future<json::json_return_type>(std::move(f));
|
||||
});
|
||||
|
||||
ss::take_snapshot.set(r, [](std::unique_ptr<request> req) {
|
||||
auto tag = req->get_query_param("tag");
|
||||
auto column_family = req->get_query_param("cf");
|
||||
|
||||
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
|
||||
|
||||
auto resp = make_ready_future<>();
|
||||
if (column_family.empty()) {
|
||||
resp = service::get_local_storage_service().take_snapshot(tag, keynames);
|
||||
} else {
|
||||
if (keynames.empty()) {
|
||||
throw httpd::bad_param_exception("The keyspace of column families must be specified");
|
||||
}
|
||||
if (keynames.size() > 1) {
|
||||
throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
|
||||
}
|
||||
resp = service::get_local_storage_service().take_column_family_snapshot(keynames[0], column_family, tag);
|
||||
}
|
||||
return resp.then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
ss::del_snapshot.set(r, [](std::unique_ptr<request> req) {
|
||||
auto tag = req->get_query_param("tag");
|
||||
auto column_family = req->get_query_param("cf");
|
||||
|
||||
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
|
||||
return service::get_local_storage_service().clear_snapshot(tag, keynames, column_family).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
ss::true_snapshots_size.set(r, [](std::unique_ptr<request> req) {
|
||||
return service::get_local_storage_service().true_snapshots_size().then([] (int64_t size) {
|
||||
return make_ready_future<json::json_return_type>(size);
|
||||
});
|
||||
});
|
||||
|
||||
ss::scrub.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
|
||||
const auto skip_corrupted = req_param<bool>(*req, "skip_corrupted", false);
|
||||
|
||||
auto f = make_ready_future<>();
|
||||
if (!req_param<bool>(*req, "disable_snapshot", false)) {
|
||||
auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
|
||||
f = parallel_for_each(column_families, [keyspace, tag](sstring cf) {
|
||||
return service::get_local_storage_service().take_column_family_snapshot(keyspace, cf, tag);
|
||||
});
|
||||
}
|
||||
|
||||
return f.then([&ctx, keyspace, column_families, skip_corrupted] {
|
||||
return ctx.db.invoke_on_all([=] (database& db) {
|
||||
return do_for_each(column_families, [=, &db](sstring cfname) {
|
||||
auto& cm = db.get_compaction_manager();
|
||||
auto& cf = db.find_column_family(keyspace, cfname);
|
||||
return cm.perform_sstable_scrub(&cf, skip_corrupted);
|
||||
});
|
||||
});
|
||||
}).then([]{
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
}));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -26,6 +26,5 @@
|
||||
namespace api {
|
||||
|
||||
void set_storage_service(http_context& ctx, routes& r);
|
||||
void set_snapshot(http_context& ctx, routes& r);
|
||||
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ public:
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
virtual std::string_view qualified_java_name() const override {
|
||||
virtual const sstring& qualified_java_name() const override {
|
||||
return allow_all_authenticator_name();
|
||||
}
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ public:
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
virtual std::string_view qualified_java_name() const override {
|
||||
virtual const sstring& qualified_java_name() const override {
|
||||
return allow_all_authorizer_name();
|
||||
}
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ public:
|
||||
///
|
||||
/// A fully-qualified (class with package) Java-like name for this implementation.
|
||||
///
|
||||
virtual std::string_view qualified_java_name() const = 0;
|
||||
virtual const sstring& qualified_java_name() const = 0;
|
||||
|
||||
virtual bool require_authentication() const = 0;
|
||||
|
||||
|
||||
@@ -100,7 +100,7 @@ public:
|
||||
///
|
||||
/// A fully-qualified (class with package) Java-like name for this implementation.
|
||||
///
|
||||
virtual std::string_view qualified_java_name() const = 0;
|
||||
virtual const sstring& qualified_java_name() const = 0;
|
||||
|
||||
///
|
||||
/// Query for the permissions granted directly to a role for a particular \ref resource (and not any of its
|
||||
|
||||
@@ -59,7 +59,7 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
|
||||
}).discard_result();
|
||||
}
|
||||
|
||||
static future<> create_metadata_table_if_missing_impl(
|
||||
future<> create_metadata_table_if_missing(
|
||||
std::string_view table_name,
|
||||
cql3::query_processor& qp,
|
||||
std::string_view cql,
|
||||
@@ -85,14 +85,7 @@ static future<> create_metadata_table_if_missing_impl(
|
||||
return ignore_existing([&mm, table = std::move(table)] () {
|
||||
return mm.announce_new_column_family(table, false);
|
||||
});
|
||||
}
|
||||
|
||||
future<> create_metadata_table_if_missing(
|
||||
std::string_view table_name,
|
||||
cql3::query_processor& qp,
|
||||
std::string_view cql,
|
||||
::service::migration_manager& mm) noexcept {
|
||||
return futurize_apply(create_metadata_table_if_missing_impl, table_name, qp, cql, mm);
|
||||
}
|
||||
|
||||
future<> wait_for_schema_agreement(::service::migration_manager& mm, const database& db, seastar::abort_source& as) {
|
||||
|
||||
@@ -79,7 +79,7 @@ future<> create_metadata_table_if_missing(
|
||||
std::string_view table_name,
|
||||
cql3::query_processor&,
|
||||
std::string_view cql,
|
||||
::service::migration_manager&) noexcept;
|
||||
::service::migration_manager&);
|
||||
|
||||
future<> wait_for_schema_agreement(::service::migration_manager&, const database&, seastar::abort_source&);
|
||||
|
||||
|
||||
@@ -101,7 +101,7 @@ bool default_authorizer::legacy_metadata_exists() const {
|
||||
future<bool> default_authorizer::any_granted() const {
|
||||
static const sstring query = format("SELECT * FROM {}.{} LIMIT 1", meta::AUTH_KS, PERMISSIONS_CF);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config,
|
||||
@@ -115,7 +115,7 @@ future<> default_authorizer::migrate_legacy_metadata() const {
|
||||
alogger.info("Starting migration of legacy permissions metadata.");
|
||||
static const sstring query = format("SELECT * FROM {}.{}", meta::AUTH_KS, legacy_table_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
@@ -195,7 +195,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config,
|
||||
@@ -224,7 +224,7 @@ default_authorizer::modify(
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME),
|
||||
[this, &role_name, set, &resource](const auto& query) {
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -249,7 +249,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
|
||||
meta::AUTH_KS,
|
||||
PERMISSIONS_CF);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -276,7 +276,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name) const {
|
||||
PERMISSIONS_CF,
|
||||
ROLE_NAME);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -296,7 +296,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
|
||||
PERMISSIONS_CF,
|
||||
RESOURCE_NAME);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config,
|
||||
@@ -313,7 +313,7 @@ future<> default_authorizer::revoke_all(const resource& resource) const {
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
infinite_timeout_config,
|
||||
|
||||
@@ -71,7 +71,7 @@ public:
|
||||
|
||||
virtual future<> stop() override;
|
||||
|
||||
virtual std::string_view qualified_java_name() const override {
|
||||
virtual const sstring& qualified_java_name() const override {
|
||||
return default_authorizer_name();
|
||||
}
|
||||
|
||||
|
||||
@@ -96,13 +96,10 @@ static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
|
||||
return !row.get_or<sstring>(SALTED_HASH, "").empty();
|
||||
}
|
||||
|
||||
static const sstring& update_row_query() {
|
||||
static const sstring update_row_query = format("UPDATE {} SET {} = ? WHERE {} = ?",
|
||||
meta::roles_table::qualified_name(),
|
||||
SALTED_HASH,
|
||||
meta::roles_table::role_col_name);
|
||||
return update_row_query;
|
||||
}
|
||||
static const sstring update_row_query = format("UPDATE {} SET {} = ? WHERE {} = ?",
|
||||
meta::roles_table::qualified_name(),
|
||||
SALTED_HASH,
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
static const sstring legacy_table_name{"credentials"};
|
||||
|
||||
@@ -114,7 +111,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
plogger.info("Starting migration of legacy authentication metadata.");
|
||||
static const sstring query = format("SELECT * FROM {}.{}", meta::AUTH_KS, legacy_table_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
@@ -122,8 +119,8 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
auto username = row.get_as<sstring>("username");
|
||||
auto salted_hash = row.get_as<sstring>(SALTED_HASH);
|
||||
|
||||
return _qp.execute_internal(
|
||||
update_row_query(),
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
consistency_for_user(username),
|
||||
internal_distributed_timeout_config(),
|
||||
{std::move(salted_hash), username}).discard_result();
|
||||
@@ -139,8 +136,8 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
future<> password_authenticator::create_default_if_missing() const {
|
||||
return default_role_row_satisfies(_qp, &has_salted_hash).then([this](bool exists) {
|
||||
if (!exists) {
|
||||
return _qp.execute_internal(
|
||||
update_row_query(),
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config(),
|
||||
{passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt), DEFAULT_USER_NAME}).then([](auto&&) {
|
||||
@@ -197,7 +194,7 @@ db::consistency_level password_authenticator::consistency_for_user(std::string_v
|
||||
return db::consistency_level::LOCAL_ONE;
|
||||
}
|
||||
|
||||
std::string_view password_authenticator::qualified_java_name() const {
|
||||
const sstring& password_authenticator::qualified_java_name() const {
|
||||
return password_authenticator_name();
|
||||
}
|
||||
|
||||
@@ -236,7 +233,7 @@ future<authenticated_user> password_authenticator::authenticate(
|
||||
meta::roles_table::qualified_name(),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_user(username),
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -270,8 +267,8 @@ future<> password_authenticator::create(std::string_view role_name, const authen
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return _qp.execute_internal(
|
||||
update_row_query(),
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
consistency_for_user(role_name),
|
||||
internal_distributed_timeout_config(),
|
||||
{passwords::hash(*options.password, rng_for_salt), sstring(role_name)}).discard_result();
|
||||
@@ -287,7 +284,7 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
|
||||
SALTED_HASH,
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_user(role_name),
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -300,7 +297,7 @@ future<> password_authenticator::drop(std::string_view name) const {
|
||||
meta::roles_table::qualified_name(),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query, consistency_for_user(name),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(name)}).discard_result();
|
||||
|
||||
@@ -71,7 +71,7 @@ public:
|
||||
|
||||
virtual future<> stop() override;
|
||||
|
||||
virtual std::string_view qualified_java_name() const override;
|
||||
virtual const sstring& qualified_java_name() const override;
|
||||
|
||||
virtual bool require_authentication() const override;
|
||||
|
||||
|
||||
@@ -68,14 +68,14 @@ future<bool> default_role_row_satisfies(
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return do_with(std::move(p), [&qp](const auto& p) {
|
||||
return qp.execute_internal(
|
||||
return qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
true).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return qp.execute_internal(
|
||||
return qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -100,7 +100,7 @@ future<bool> any_nondefault_role_row_satisfies(
|
||||
static const sstring query = format("SELECT * FROM {}", meta::roles_table::qualified_name());
|
||||
|
||||
return do_with(std::move(p), [&qp](const auto& p) {
|
||||
return qp.execute_internal(
|
||||
return qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
|
||||
@@ -194,10 +194,7 @@ future<> service::stop() {
|
||||
// Only one of the shards has the listener registered, but let's try to
|
||||
// unregister on each one just to make sure.
|
||||
return _mnotifier.unregister_listener(_migration_listener.get()).then([this] {
|
||||
if (_permissions_cache) {
|
||||
return _permissions_cache->stop();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
return _permissions_cache->stop();
|
||||
}).then([this] {
|
||||
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
|
||||
});
|
||||
@@ -220,7 +217,7 @@ future<bool> service::has_existing_legacy_users() const {
|
||||
// This logic is borrowed directly from Apache Cassandra. By first checking for the presence of the default user, we
|
||||
// can potentially avoid doing a range query with a high consistency level.
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
default_user_query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
@@ -230,7 +227,7 @@ future<bool> service::has_existing_legacy_users() const {
|
||||
return make_ready_future<bool>(true);
|
||||
}
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
default_user_query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config,
|
||||
@@ -240,7 +237,7 @@ future<bool> service::has_existing_legacy_users() const {
|
||||
return make_ready_future<bool>(true);
|
||||
}
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
all_users_query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config).then([](auto results) {
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "auth/common.hh"
|
||||
#include "auth/roles-metadata.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "log.hh"
|
||||
@@ -87,7 +86,7 @@ static future<std::optional<record>> find_record(cql3::query_processor& qp, std:
|
||||
meta::roles_table::qualified_name(),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return qp.execute_internal(
|
||||
return qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -171,7 +170,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
|
||||
meta::roles_table::qualified_name(),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -198,7 +197,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
|
||||
log.info("Starting migration of legacy user metadata.");
|
||||
static const sstring query = format("SELECT * FROM {}.{}", meta::AUTH_KS, legacy_table_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
@@ -259,7 +258,7 @@ future<> standard_role_manager::create_or_replace(std::string_view role_name, co
|
||||
meta::roles_table::qualified_name(),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -299,7 +298,7 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
format("UPDATE {} SET {} WHERE {} = ?",
|
||||
meta::roles_table::qualified_name(),
|
||||
build_column_assignments(u),
|
||||
@@ -321,7 +320,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
|
||||
static const sstring query = format("SELECT member FROM {} WHERE role = ?",
|
||||
meta::role_members_table::qualified_name());
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -360,7 +359,7 @@ future<> standard_role_manager::drop(std::string_view role_name) const {
|
||||
meta::roles_table::qualified_name(),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -387,7 +386,7 @@ standard_role_manager::modify_membership(
|
||||
(ch == membership_change::add ? '+' : '-'),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(grantee_name),
|
||||
internal_distributed_timeout_config(),
|
||||
@@ -397,7 +396,7 @@ standard_role_manager::modify_membership(
|
||||
const auto modify_role_members = [this, role_name, grantee_name, ch] {
|
||||
switch (ch) {
|
||||
case membership_change::add:
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
format("INSERT INTO {} (role, member) VALUES (?, ?)",
|
||||
meta::role_members_table::qualified_name()),
|
||||
consistency_for_role(role_name),
|
||||
@@ -405,7 +404,7 @@ standard_role_manager::modify_membership(
|
||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||
|
||||
case membership_change::remove:
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
format("DELETE FROM {} WHERE role = ? AND member = ?",
|
||||
meta::role_members_table::qualified_name()),
|
||||
consistency_for_role(role_name),
|
||||
@@ -509,7 +508,7 @@ future<role_set> standard_role_manager::query_all() const {
|
||||
// To avoid many copies of a view.
|
||||
static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
|
||||
@@ -82,7 +82,7 @@ public:
|
||||
return _authenticator->stop();
|
||||
}
|
||||
|
||||
virtual std::string_view qualified_java_name() const override {
|
||||
virtual const sstring& qualified_java_name() const override {
|
||||
return transitional_authenticator_name();
|
||||
}
|
||||
|
||||
@@ -201,7 +201,7 @@ public:
|
||||
return _authorizer->stop();
|
||||
}
|
||||
|
||||
virtual std::string_view qualified_java_name() const override {
|
||||
virtual const sstring& qualified_java_name() const override {
|
||||
return transitional_authorizer_name();
|
||||
}
|
||||
|
||||
|
||||
@@ -23,11 +23,7 @@
|
||||
#include <seastar/core/scheduling.hh>
|
||||
#include <seastar/core/timer.hh>
|
||||
#include <seastar/core/gate.hh>
|
||||
#include <seastar/core/file.hh>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
|
||||
#include "seastarx.hh"
|
||||
|
||||
// Simple proportional controller to adjust shares for processes for which a backlog can be clearly
|
||||
// defined.
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#include <link.h>
|
||||
#include <seastar/core/align.hh>
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
|
||||
using namespace seastar;
|
||||
|
||||
|
||||
2
bytes.cc
2
bytes.cc
@@ -64,7 +64,7 @@ bytes from_hex(sstring_view s) {
|
||||
|
||||
sstring to_hex(bytes_view b) {
|
||||
static char digits[] = "0123456789abcdef";
|
||||
sstring out = uninitialized_string(b.size() * 2);
|
||||
sstring out(sstring::initialized_later(), b.size() * 2);
|
||||
unsigned end = b.size();
|
||||
for (unsigned i = 0; i != end; ++i) {
|
||||
uint8_t x = b[i];
|
||||
|
||||
@@ -92,7 +92,7 @@ mutation canonical_mutation::to_mutation(schema_ptr s) const {
|
||||
}
|
||||
|
||||
static sstring bytes_to_text(bytes_view bv) {
|
||||
sstring ret = uninitialized_string(bv.size());
|
||||
sstring ret(sstring::initialized_later(), bv.size());
|
||||
std::copy_n(reinterpret_cast<const char*>(bv.data()), bv.size(), ret.data());
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "schema_fwd.hh"
|
||||
#include "schema.hh"
|
||||
#include "database_fwd.hh"
|
||||
#include "mutation_partition_visitor.hh"
|
||||
#include "mutation_partition_serializer.hh"
|
||||
|
||||
@@ -22,9 +22,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <sys/types.h>
|
||||
|
||||
// Single-pass range over cartesian product of vectors.
|
||||
|
||||
// Note:
|
||||
|
||||
835
cdc/cdc.cc
Normal file
835
cdc/cdc.cc
Normal file
@@ -0,0 +1,835 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/range/irange.hpp>
|
||||
#include <seastar/util/defer.hh>
|
||||
#include <seastar/core/thread.hh>
|
||||
|
||||
#include "cdc/cdc.hh"
|
||||
#include "bytes.hh"
|
||||
#include "database.hh"
|
||||
#include "db/config.hh"
|
||||
#include "dht/murmur3_partitioner.hh"
|
||||
#include "partition_slice_builder.hh"
|
||||
#include "schema.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "service/migration_listener.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "types/tuple.hh"
|
||||
#include "cql3/statements/select_statement.hh"
|
||||
#include "cql3/multi_column_relation.hh"
|
||||
#include "cql3/tuples.hh"
|
||||
#include "log.hh"
|
||||
#include "json.hh"
|
||||
|
||||
using locator::snitch_ptr;
|
||||
using locator::token_metadata;
|
||||
using locator::topology;
|
||||
using seastar::sstring;
|
||||
using service::migration_notifier;
|
||||
using service::storage_proxy;
|
||||
|
||||
namespace std {
|
||||
|
||||
template<> struct hash<std::pair<net::inet_address, unsigned int>> {
|
||||
std::size_t operator()(const std::pair<net::inet_address, unsigned int> &p) const {
|
||||
return std::hash<net::inet_address>{}(p.first) ^ std::hash<int>{}(p.second);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
static logging::logger cdc_log("cdc");
|
||||
|
||||
namespace cdc {
|
||||
static schema_ptr create_log_schema(const schema&, std::optional<utils::UUID> = {});
|
||||
static schema_ptr create_stream_description_table_schema(const schema&, std::optional<utils::UUID> = {});
|
||||
static future<> populate_desc(db_context ctx, const schema& s);
|
||||
}
|
||||
|
||||
class cdc::cdc_service::impl : service::migration_listener::empty_listener {
|
||||
friend cdc_service;
|
||||
db_context _ctxt;
|
||||
bool _stopped = false;
|
||||
public:
|
||||
impl(db_context ctxt)
|
||||
: _ctxt(std::move(ctxt))
|
||||
{
|
||||
_ctxt._migration_notifier.register_listener(this);
|
||||
}
|
||||
~impl() {
|
||||
assert(_stopped);
|
||||
}
|
||||
|
||||
future<> stop() {
|
||||
return _ctxt._migration_notifier.unregister_listener(this).then([this] {
|
||||
_stopped = true;
|
||||
});
|
||||
}
|
||||
|
||||
void on_before_create_column_family(const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
|
||||
if (schema.cdc_options().enabled()) {
|
||||
auto& db = _ctxt._proxy.get_db().local();
|
||||
auto logname = log_name(schema.cf_name());
|
||||
if (!db.has_schema(schema.ks_name(), logname)) {
|
||||
// in seastar thread
|
||||
auto log_schema = create_log_schema(schema);
|
||||
auto stream_desc_schema = create_stream_description_table_schema(schema);
|
||||
auto& keyspace = db.find_keyspace(schema.ks_name());
|
||||
|
||||
auto log_mut = db::schema_tables::make_create_table_mutations(keyspace.metadata(), log_schema, timestamp);
|
||||
auto stream_mut = db::schema_tables::make_create_table_mutations(keyspace.metadata(), stream_desc_schema, timestamp);
|
||||
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(stream_mut.begin()), std::make_move_iterator(stream_mut.end()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void on_before_update_column_family(const schema& new_schema, const schema& old_schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
|
||||
bool is_cdc = new_schema.cdc_options().enabled();
|
||||
bool was_cdc = old_schema.cdc_options().enabled();
|
||||
|
||||
// we need to create or modify the log & stream schemas iff either we changed cdc status (was != is)
|
||||
// or if cdc is on now unconditionally, since then any actual base schema changes will affect the column
|
||||
// etc.
|
||||
if (was_cdc || is_cdc) {
|
||||
auto logname = log_name(old_schema.cf_name());
|
||||
auto descname = desc_name(old_schema.cf_name());
|
||||
auto& db = _ctxt._proxy.get_db().local();
|
||||
auto& keyspace = db.find_keyspace(old_schema.ks_name());
|
||||
auto log_schema = was_cdc ? db.find_column_family(old_schema.ks_name(), logname).schema() : nullptr;
|
||||
auto stream_desc_schema = was_cdc ? db.find_column_family(old_schema.ks_name(), descname).schema() : nullptr;
|
||||
|
||||
if (!is_cdc) {
|
||||
auto log_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), log_schema, timestamp);
|
||||
auto stream_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), stream_desc_schema, timestamp);
|
||||
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(stream_mut.begin()), std::make_move_iterator(stream_mut.end()));
|
||||
return;
|
||||
}
|
||||
|
||||
auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt);
|
||||
auto new_stream_desc_schema = create_stream_description_table_schema(new_schema, stream_desc_schema ? std::make_optional(stream_desc_schema->id()) : std::nullopt);
|
||||
|
||||
auto log_mut = log_schema
|
||||
? db::schema_tables::make_update_table_mutations(keyspace.metadata(), log_schema, new_log_schema, timestamp, false)
|
||||
: db::schema_tables::make_create_table_mutations(keyspace.metadata(), new_log_schema, timestamp)
|
||||
;
|
||||
auto stream_mut = stream_desc_schema
|
||||
? db::schema_tables::make_update_table_mutations(keyspace.metadata(), stream_desc_schema, new_stream_desc_schema, timestamp, false)
|
||||
: db::schema_tables::make_create_table_mutations(keyspace.metadata(), new_stream_desc_schema, timestamp)
|
||||
;
|
||||
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(stream_mut.begin()), std::make_move_iterator(stream_mut.end()));
|
||||
}
|
||||
}
|
||||
|
||||
void on_before_drop_column_family(const schema& schema, std::vector<mutation>& mutations, api::timestamp_type timestamp) override {
|
||||
if (schema.cdc_options().enabled()) {
|
||||
auto logname = log_name(schema.cf_name());
|
||||
auto descname = desc_name(schema.cf_name());
|
||||
auto& db = _ctxt._proxy.get_db().local();
|
||||
auto& keyspace = db.find_keyspace(schema.ks_name());
|
||||
auto log_schema = db.find_column_family(schema.ks_name(), logname).schema();
|
||||
auto stream_desc_schema = db.find_column_family(schema.ks_name(), descname).schema();
|
||||
|
||||
auto log_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), log_schema, timestamp);
|
||||
auto stream_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), stream_desc_schema, timestamp);
|
||||
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(stream_mut.begin()), std::make_move_iterator(stream_mut.end()));
|
||||
}
|
||||
}
|
||||
|
||||
void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {
|
||||
// This callback is done on all shards. Only do the work once.
|
||||
if (engine().cpu_id() != 0) {
|
||||
return;
|
||||
}
|
||||
auto& db = _ctxt._proxy.get_db().local();
|
||||
auto& cf = db.find_column_family(ks_name, cf_name);
|
||||
auto schema = cf.schema();
|
||||
if (schema->cdc_options().enabled()) {
|
||||
populate_desc(_ctxt, *schema).get();
|
||||
}
|
||||
}
|
||||
|
||||
void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) override {
|
||||
on_create_column_family(ks_name, cf_name);
|
||||
}
|
||||
|
||||
void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {}
|
||||
|
||||
future<std::tuple<std::vector<mutation>, result_callback>> augment_mutation_call(
|
||||
lowres_clock::time_point timeout,
|
||||
std::vector<mutation>&& mutations
|
||||
);
|
||||
|
||||
template<typename Iter>
|
||||
future<> append_mutations(Iter i, Iter e, schema_ptr s, lowres_clock::time_point, std::vector<mutation>&);
|
||||
};
|
||||
|
||||
cdc::cdc_service::cdc_service(service::storage_proxy& proxy)
|
||||
: cdc_service(db_context::builder(proxy).build())
|
||||
{}
|
||||
|
||||
cdc::cdc_service::cdc_service(db_context ctxt)
|
||||
: _impl(std::make_unique<impl>(std::move(ctxt)))
|
||||
{
|
||||
_impl->_ctxt._proxy.set_cdc_service(this);
|
||||
}
|
||||
|
||||
future<> cdc::cdc_service::stop() {
|
||||
return _impl->stop();
|
||||
}
|
||||
|
||||
cdc::cdc_service::~cdc_service() = default;
|
||||
|
||||
cdc::options::options(const std::map<sstring, sstring>& map) {
|
||||
if (map.find("enabled") == std::end(map)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto& p : map) {
|
||||
if (p.first == "enabled") {
|
||||
_enabled = p.second == "true";
|
||||
} else if (p.first == "preimage") {
|
||||
_preimage = p.second == "true";
|
||||
} else if (p.first == "postimage") {
|
||||
_postimage = p.second == "true";
|
||||
} else if (p.first == "ttl") {
|
||||
_ttl = std::stoi(p.second);
|
||||
} else {
|
||||
throw exceptions::configuration_exception("Invalid CDC option: " + p.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::map<sstring, sstring> cdc::options::to_map() const {
|
||||
if (!_enabled) {
|
||||
return {};
|
||||
}
|
||||
return {
|
||||
{ "enabled", _enabled ? "true" : "false" },
|
||||
{ "preimage", _preimage ? "true" : "false" },
|
||||
{ "postimage", _postimage ? "true" : "false" },
|
||||
{ "ttl", std::to_string(_ttl) },
|
||||
};
|
||||
}
|
||||
|
||||
sstring cdc::options::to_sstring() const {
|
||||
return json::to_json(to_map());
|
||||
}
|
||||
|
||||
bool cdc::options::operator==(const options& o) const {
|
||||
return _enabled == o._enabled && _preimage == o._preimage && _postimage == o._postimage && _ttl == o._ttl;
|
||||
}
|
||||
bool cdc::options::operator!=(const options& o) const {
|
||||
return !(*this == o);
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
|
||||
using operation_native_type = std::underlying_type_t<operation>;
|
||||
using column_op_native_type = std::underlying_type_t<column_op>;
|
||||
|
||||
sstring log_name(const sstring& table_name) {
|
||||
static constexpr auto cdc_log_suffix = "_scylla_cdc_log";
|
||||
return table_name + cdc_log_suffix;
|
||||
}
|
||||
|
||||
sstring desc_name(const sstring& table_name) {
|
||||
static constexpr auto cdc_desc_suffix = "_scylla_cdc_desc";
|
||||
return table_name + cdc_desc_suffix;
|
||||
}
|
||||
|
||||
static schema_ptr create_log_schema(const schema& s, std::optional<utils::UUID> uuid) {
|
||||
schema_builder b(s.ks_name(), log_name(s.cf_name()));
|
||||
b.set_comment(sprint("CDC log for %s.%s", s.ks_name(), s.cf_name()));
|
||||
b.with_column("stream_id", uuid_type, column_kind::partition_key);
|
||||
b.with_column("time", timeuuid_type, column_kind::clustering_key);
|
||||
b.with_column("batch_seq_no", int32_type, column_kind::clustering_key);
|
||||
b.with_column("operation", data_type_for<operation_native_type>());
|
||||
b.with_column("ttl", long_type);
|
||||
auto add_columns = [&] (const schema::const_iterator_range_type& columns, bool is_data_col = false) {
|
||||
for (const auto& column : columns) {
|
||||
auto type = column.type;
|
||||
if (is_data_col) {
|
||||
type = tuple_type_impl::get_instance({ /* op */ data_type_for<column_op_native_type>(), /* value */ type, /* ttl */long_type});
|
||||
}
|
||||
b.with_column("_" + column.name(), type);
|
||||
}
|
||||
};
|
||||
add_columns(s.partition_key_columns());
|
||||
add_columns(s.clustering_key_columns());
|
||||
add_columns(s.static_columns(), true);
|
||||
add_columns(s.regular_columns(), true);
|
||||
|
||||
if (uuid) {
|
||||
b.set_uuid(*uuid);
|
||||
}
|
||||
|
||||
return b.build();
|
||||
}
|
||||
|
||||
static schema_ptr create_stream_description_table_schema(const schema& s, std::optional<utils::UUID> uuid) {
|
||||
schema_builder b(s.ks_name(), desc_name(s.cf_name()));
|
||||
b.set_comment(sprint("CDC description for %s.%s", s.ks_name(), s.cf_name()));
|
||||
b.with_column("node_ip", inet_addr_type, column_kind::partition_key);
|
||||
b.with_column("shard_id", int32_type, column_kind::partition_key);
|
||||
b.with_column("created_at", timestamp_type, column_kind::clustering_key);
|
||||
b.with_column("stream_id", uuid_type);
|
||||
|
||||
if (uuid) {
|
||||
b.set_uuid(*uuid);
|
||||
}
|
||||
|
||||
return b.build();
|
||||
}
|
||||
|
||||
// This function assumes setup_stream_description_table was called on |s| before the call to this
|
||||
// function.
|
||||
static future<> populate_desc(db_context ctx, const schema& s) {
|
||||
auto& db = ctx._proxy.get_db().local();
|
||||
auto desc_schema =
|
||||
db.find_schema(s.ks_name(), desc_name(s.cf_name()));
|
||||
auto log_schema =
|
||||
db.find_schema(s.ks_name(), log_name(s.cf_name()));
|
||||
auto belongs_to = [&](const gms::inet_address& endpoint,
|
||||
const unsigned int shard_id,
|
||||
const int shard_count,
|
||||
const unsigned int ignore_msb_bits,
|
||||
const utils::UUID& stream_id) {
|
||||
const auto log_pk = partition_key::from_singular(*log_schema,
|
||||
data_value(stream_id));
|
||||
const auto token = ctx._partitioner.decorate_key(*log_schema, log_pk).token();
|
||||
if (ctx._token_metadata.get_endpoint(ctx._token_metadata.first_token(token)) != endpoint) {
|
||||
return false;
|
||||
}
|
||||
const auto owning_shard_id = dht::murmur3_partitioner(shard_count, ignore_msb_bits).shard_of(token);
|
||||
return owning_shard_id == shard_id;
|
||||
};
|
||||
|
||||
std::vector<mutation> mutations;
|
||||
const auto ts = api::new_timestamp();
|
||||
const auto ck = clustering_key::from_single_value(
|
||||
*desc_schema, timestamp_type->decompose(ts));
|
||||
auto cdef = desc_schema->get_column_definition(to_bytes("stream_id"));
|
||||
|
||||
for (const auto& dc : ctx._token_metadata.get_topology().get_datacenter_endpoints()) {
|
||||
for (const auto& endpoint : dc.second) {
|
||||
const auto decomposed_ip = inet_addr_type->decompose(endpoint.addr());
|
||||
const unsigned int shard_count = ctx._snitch->get_shard_count(endpoint);
|
||||
const unsigned int ignore_msb_bits = ctx._snitch->get_ignore_msb_bits(endpoint);
|
||||
for (unsigned int shard_id = 0; shard_id < shard_count; ++shard_id) {
|
||||
const auto pk = partition_key::from_exploded(
|
||||
*desc_schema, { decomposed_ip, int32_type->decompose(static_cast<int>(shard_id)) });
|
||||
mutations.emplace_back(desc_schema, pk);
|
||||
|
||||
auto stream_id = utils::make_random_uuid();
|
||||
while (!belongs_to(endpoint, shard_id, shard_count, ignore_msb_bits, stream_id)) {
|
||||
stream_id = utils::make_random_uuid();
|
||||
}
|
||||
auto value = atomic_cell::make_live(*uuid_type,
|
||||
ts,
|
||||
uuid_type->decompose(stream_id));
|
||||
mutations.back().set_cell(ck, *cdef, std::move(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
return ctx._proxy.mutate(std::move(mutations),
|
||||
db::consistency_level::QUORUM,
|
||||
db::no_timeout,
|
||||
nullptr,
|
||||
empty_service_permit());
|
||||
}
|
||||
|
||||
db_context::builder::builder(service::storage_proxy& proxy)
|
||||
: _proxy(proxy)
|
||||
{}
|
||||
|
||||
db_context::builder& db_context::builder::with_migration_notifier(service::migration_notifier& migration_notifier) {
|
||||
_migration_notifier = migration_notifier;
|
||||
return *this;
|
||||
}
|
||||
|
||||
db_context::builder& db_context::builder::with_token_metadata(locator::token_metadata& token_metadata) {
|
||||
_token_metadata = token_metadata;
|
||||
return *this;
|
||||
}
|
||||
|
||||
db_context::builder& db_context::builder::with_snitch(locator::snitch_ptr& snitch) {
|
||||
_snitch = snitch;
|
||||
return *this;
|
||||
}
|
||||
|
||||
db_context::builder& db_context::builder::with_partitioner(dht::i_partitioner& partitioner) {
|
||||
_partitioner = partitioner;
|
||||
return *this;
|
||||
}
|
||||
|
||||
db_context db_context::builder::build() {
|
||||
return db_context{
|
||||
_proxy,
|
||||
_migration_notifier ? _migration_notifier->get() : service::get_local_storage_service().get_migration_notifier(),
|
||||
_token_metadata ? _token_metadata->get() : service::get_local_storage_service().get_token_metadata(),
|
||||
_snitch ? _snitch->get() : locator::i_endpoint_snitch::get_local_snitch_ptr(),
|
||||
_partitioner ? _partitioner->get() : dht::global_partitioner()
|
||||
};
|
||||
}
|
||||
|
||||
class transformer final {
|
||||
public:
|
||||
using streams_type = std::unordered_map<std::pair<net::inet_address, unsigned int>, utils::UUID>;
|
||||
private:
|
||||
db_context _ctx;
|
||||
schema_ptr _schema;
|
||||
schema_ptr _log_schema;
|
||||
utils::UUID _time;
|
||||
bytes _decomposed_time;
|
||||
::shared_ptr<const transformer::streams_type> _streams;
|
||||
const column_definition& _op_col;
|
||||
ttl_opt _cdc_ttl_opt;
|
||||
|
||||
clustering_key set_pk_columns(const partition_key& pk, int batch_no, mutation& m) const {
|
||||
const auto log_ck = clustering_key::from_exploded(
|
||||
*m.schema(), { _decomposed_time, int32_type->decompose(batch_no) });
|
||||
auto pk_value = pk.explode(*_schema);
|
||||
size_t pos = 0;
|
||||
for (const auto& column : _schema->partition_key_columns()) {
|
||||
assert (pos < pk_value.size());
|
||||
auto cdef = m.schema()->get_column_definition(to_bytes("_" + column.name()));
|
||||
auto value = atomic_cell::make_live(*column.type,
|
||||
_time.timestamp(),
|
||||
bytes_view(pk_value[pos]),
|
||||
_cdc_ttl_opt);
|
||||
m.set_cell(log_ck, *cdef, std::move(value));
|
||||
++pos;
|
||||
}
|
||||
return log_ck;
|
||||
}
|
||||
|
||||
void set_operation(const clustering_key& ck, operation op, mutation& m) const {
|
||||
m.set_cell(ck, _op_col, atomic_cell::make_live(*_op_col.type, _time.timestamp(), _op_col.type->decompose(operation_native_type(op)), _cdc_ttl_opt));
|
||||
}
|
||||
|
||||
partition_key stream_id(const net::inet_address& ip, unsigned int shard_id) const {
|
||||
auto it = _streams->find(std::make_pair(ip, shard_id));
|
||||
if (it == std::end(*_streams)) {
|
||||
throw std::runtime_error(format("No stream found for node {} and shard {}", ip, shard_id));
|
||||
}
|
||||
return partition_key::from_exploded(*_log_schema, { uuid_type->decompose(it->second) });
|
||||
}
|
||||
public:
|
||||
transformer(db_context ctx, schema_ptr s, ::shared_ptr<const transformer::streams_type> streams)
|
||||
: _ctx(ctx)
|
||||
, _schema(std::move(s))
|
||||
, _log_schema(ctx._proxy.get_db().local().find_schema(_schema->ks_name(), log_name(_schema->cf_name())))
|
||||
, _time(utils::UUID_gen::get_time_UUID())
|
||||
, _decomposed_time(timeuuid_type->decompose(_time))
|
||||
, _streams(std::move(streams))
|
||||
, _op_col(*_log_schema->get_column_definition(to_bytes("operation")))
|
||||
{
|
||||
if (_schema->cdc_options().ttl()) {
|
||||
_cdc_ttl_opt = std::chrono::seconds(_schema->cdc_options().ttl());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: is pre-image data based on query enough. We only have actual column data. Do we need
|
||||
// more details like tombstones/ttl? Probably not but keep in mind.
|
||||
mutation transform(const mutation& m, const cql3::untyped_result_set* rs = nullptr) const {
|
||||
auto& t = m.token();
|
||||
auto&& ep = _ctx._token_metadata.get_endpoint(
|
||||
_ctx._token_metadata.first_token(t));
|
||||
if (!ep) {
|
||||
throw std::runtime_error(format("No owner found for key {}", m.decorated_key()));
|
||||
}
|
||||
auto shard_id = dht::murmur3_partitioner(_ctx._snitch->get_shard_count(*ep), _ctx._snitch->get_ignore_msb_bits(*ep)).shard_of(t);
|
||||
mutation res(_log_schema, stream_id(ep->addr(), shard_id));
|
||||
auto& p = m.partition();
|
||||
if (p.partition_tombstone()) {
|
||||
// Partition deletion
|
||||
auto log_ck = set_pk_columns(m.key(), 0, res);
|
||||
set_operation(log_ck, operation::partition_delete, res);
|
||||
} else if (!p.row_tombstones().empty()) {
|
||||
// range deletion
|
||||
int batch_no = 0;
|
||||
for (auto& rt : p.row_tombstones()) {
|
||||
auto set_bound = [&] (const clustering_key& log_ck, const clustering_key_prefix& ckp) {
|
||||
auto exploded = ckp.explode(*_schema);
|
||||
size_t pos = 0;
|
||||
for (const auto& column : _schema->clustering_key_columns()) {
|
||||
if (pos >= exploded.size()) {
|
||||
break;
|
||||
}
|
||||
auto cdef = _log_schema->get_column_definition(to_bytes("_" + column.name()));
|
||||
auto value = atomic_cell::make_live(*column.type,
|
||||
_time.timestamp(),
|
||||
bytes_view(exploded[pos]),
|
||||
_cdc_ttl_opt);
|
||||
res.set_cell(log_ck, *cdef, std::move(value));
|
||||
++pos;
|
||||
}
|
||||
};
|
||||
{
|
||||
auto log_ck = set_pk_columns(m.key(), batch_no, res);
|
||||
set_bound(log_ck, rt.start);
|
||||
// TODO: separate inclusive/exclusive range
|
||||
set_operation(log_ck, operation::range_delete_start, res);
|
||||
++batch_no;
|
||||
}
|
||||
{
|
||||
auto log_ck = set_pk_columns(m.key(), batch_no, res);
|
||||
set_bound(log_ck, rt.end);
|
||||
// TODO: separate inclusive/exclusive range
|
||||
set_operation(log_ck, operation::range_delete_end, res);
|
||||
++batch_no;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// should be update or deletion
|
||||
int batch_no = 0;
|
||||
for (const rows_entry& r : p.clustered_rows()) {
|
||||
auto ck_value = r.key().explode(*_schema);
|
||||
|
||||
std::optional<clustering_key> pikey;
|
||||
const cql3::untyped_result_set_row * pirow = nullptr;
|
||||
|
||||
if (rs) {
|
||||
for (auto& utr : *rs) {
|
||||
bool match = true;
|
||||
for (auto& c : _schema->clustering_key_columns()) {
|
||||
auto rv = utr.get_view(c.name_as_text());
|
||||
auto cv = r.key().get_component(*_schema, c.component_index());
|
||||
if (rv != cv) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (match) {
|
||||
pikey = set_pk_columns(m.key(), batch_no, res);
|
||||
set_operation(*pikey, operation::pre_image, res);
|
||||
pirow = &utr;
|
||||
++batch_no;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto log_ck = set_pk_columns(m.key(), batch_no, res);
|
||||
|
||||
size_t pos = 0;
|
||||
for (const auto& column : _schema->clustering_key_columns()) {
|
||||
assert (pos < ck_value.size());
|
||||
auto cdef = _log_schema->get_column_definition(to_bytes("_" + column.name()));
|
||||
res.set_cell(log_ck, *cdef, atomic_cell::make_live(*column.type, _time.timestamp(), bytes_view(ck_value[pos]), _cdc_ttl_opt));
|
||||
|
||||
if (pirow) {
|
||||
assert(pirow->has(column.name_as_text()));
|
||||
res.set_cell(*pikey, *cdef, atomic_cell::make_live(*column.type, _time.timestamp(), bytes_view(ck_value[pos]), _cdc_ttl_opt));
|
||||
}
|
||||
|
||||
++pos;
|
||||
}
|
||||
|
||||
std::vector<bytes_opt> values(3);
|
||||
|
||||
auto process_cells = [&](const row& r, column_kind ckind) {
|
||||
r.for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
|
||||
auto& cdef = _schema->column_at(ckind, id);
|
||||
auto* dst = _log_schema->get_column_definition(to_bytes("_" + cdef.name()));
|
||||
// todo: collections.
|
||||
if (cdef.is_atomic()) {
|
||||
column_op op;
|
||||
|
||||
values[1] = values[2] = std::nullopt;
|
||||
auto view = cell.as_atomic_cell(cdef);
|
||||
if (view.is_live()) {
|
||||
op = column_op::set;
|
||||
values[1] = view.value().linearize();
|
||||
if (view.is_live_and_has_ttl()) {
|
||||
values[2] = long_type->decompose(data_value(view.ttl().count()));
|
||||
}
|
||||
} else {
|
||||
op = column_op::del;
|
||||
}
|
||||
|
||||
values[0] = data_type_for<column_op_native_type>()->decompose(data_value(static_cast<column_op_native_type>(op)));
|
||||
res.set_cell(log_ck, *dst, atomic_cell::make_live(*dst->type, _time.timestamp(), tuple_type_impl::build_value(values), _cdc_ttl_opt));
|
||||
|
||||
if (pirow && pirow->has(cdef.name_as_text())) {
|
||||
values[0] = data_type_for<column_op_native_type>()->decompose(data_value(static_cast<column_op_native_type>(column_op::set)));
|
||||
values[1] = pirow->get_blob(cdef.name_as_text());
|
||||
values[2] = std::nullopt;
|
||||
|
||||
assert(std::addressof(res.partition().clustered_row(*_log_schema, *pikey)) != std::addressof(res.partition().clustered_row(*_log_schema, log_ck)));
|
||||
assert(pikey->explode() != log_ck.explode());
|
||||
res.set_cell(*pikey, *dst, atomic_cell::make_live(*dst->type, _time.timestamp(), tuple_type_impl::build_value(values), _cdc_ttl_opt));
|
||||
}
|
||||
} else {
|
||||
cdc_log.warn("Non-atomic cell ignored {}.{}:{}", _schema->ks_name(), _schema->cf_name(), cdef.name_as_text());
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
process_cells(r.row().cells(), column_kind::regular_column);
|
||||
process_cells(p.static_row().get(), column_kind::static_column);
|
||||
|
||||
set_operation(log_ck, operation::update, res);
|
||||
++batch_no;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static db::timeout_clock::time_point default_timeout() {
|
||||
return db::timeout_clock::now() + 10s;
|
||||
}
|
||||
|
||||
future<lw_shared_ptr<cql3::untyped_result_set>> pre_image_select(
|
||||
service::client_state& client_state,
|
||||
db::consistency_level cl,
|
||||
const mutation& m)
|
||||
{
|
||||
auto& p = m.partition();
|
||||
if (p.partition_tombstone() || !p.row_tombstones().empty() || p.clustered_rows().empty()) {
|
||||
return make_ready_future<lw_shared_ptr<cql3::untyped_result_set>>();
|
||||
}
|
||||
|
||||
dht::partition_range_vector partition_ranges{dht::partition_range(m.decorated_key())};
|
||||
|
||||
auto&& pc = _schema->partition_key_columns();
|
||||
auto&& cc = _schema->clustering_key_columns();
|
||||
|
||||
std::vector<query::clustering_range> bounds;
|
||||
if (cc.empty()) {
|
||||
bounds.push_back(query::clustering_range::make_open_ended_both_sides());
|
||||
} else {
|
||||
for (const rows_entry& r : p.clustered_rows()) {
|
||||
auto& ck = r.key();
|
||||
bounds.push_back(query::clustering_range::make_singular(ck));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const column_definition*> columns;
|
||||
columns.reserve(_schema->all_columns().size());
|
||||
|
||||
std::transform(pc.begin(), pc.end(), std::back_inserter(columns), [](auto& c) { return &c; });
|
||||
std::transform(cc.begin(), cc.end(), std::back_inserter(columns), [](auto& c) { return &c; });
|
||||
|
||||
query::column_id_vector static_columns, regular_columns;
|
||||
|
||||
auto sk = column_kind::static_column;
|
||||
auto rk = column_kind::regular_column;
|
||||
// TODO: this assumes all mutations touch the same set of columns. This might not be true, and we may need to do more horrible set operation here.
|
||||
for (auto& [r, cids, kind] : { std::tie(p.static_row().get(), static_columns, sk), std::tie(p.clustered_rows().begin()->row().cells(), regular_columns, rk) }) {
|
||||
r.for_each_cell([&](column_id id, const atomic_cell_or_collection&) {
|
||||
auto& cdef =_schema->column_at(kind, id);
|
||||
cids.emplace_back(id);
|
||||
columns.emplace_back(&cdef);
|
||||
});
|
||||
}
|
||||
|
||||
auto selection = cql3::selection::selection::for_columns(_schema, std::move(columns));
|
||||
auto partition_slice = query::partition_slice(std::move(bounds), std::move(static_columns), std::move(regular_columns), selection->get_query_options());
|
||||
auto command = ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(), partition_slice, query::max_partitions);
|
||||
|
||||
return _ctx._proxy.query(_schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), empty_service_permit(), client_state)).then(
|
||||
[s = _schema, partition_slice = std::move(partition_slice), selection = std::move(selection)] (service::storage_proxy::coordinator_query_result qr) -> lw_shared_ptr<cql3::untyped_result_set> {
|
||||
cql3::selection::result_set_builder builder(*selection, gc_clock::now(), cql_serialization_format::latest());
|
||||
query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *s, *selection));
|
||||
auto result_set = builder.build();
|
||||
if (!result_set || result_set->empty()) {
|
||||
return {};
|
||||
}
|
||||
return make_lw_shared<cql3::untyped_result_set>(*result_set);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// This class is used to build a mapping from <node ip, shard id> to stream_id
|
||||
// It is used as a consumer for rows returned by the query to CDC Description Table
|
||||
class streams_builder {
|
||||
const schema& _schema;
|
||||
transformer::streams_type _streams;
|
||||
net::inet_address _node_ip = net::inet_address();
|
||||
unsigned int _shard_id = 0;
|
||||
api::timestamp_type _latest_row_timestamp = api::min_timestamp;
|
||||
utils::UUID _latest_row_stream_id = utils::UUID();
|
||||
public:
|
||||
streams_builder(const schema& s) : _schema(s) {}
|
||||
|
||||
void accept_new_partition(const partition_key& key, uint32_t row_count) {
|
||||
auto exploded = key.explode(_schema);
|
||||
_node_ip = value_cast<net::inet_address>(inet_addr_type->deserialize(exploded[0]));
|
||||
_shard_id = static_cast<unsigned int>(value_cast<int>(int32_type->deserialize(exploded[1])));
|
||||
_latest_row_timestamp = api::min_timestamp;
|
||||
_latest_row_stream_id = utils::UUID();
|
||||
}
|
||||
|
||||
void accept_new_partition(uint32_t row_count) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
void accept_new_row(
|
||||
const clustering_key& key,
|
||||
const query::result_row_view& static_row,
|
||||
const query::result_row_view& row) {
|
||||
auto row_iterator = row.iterator();
|
||||
api::timestamp_type timestamp = value_cast<db_clock::time_point>(
|
||||
timestamp_type->deserialize(key.explode(_schema)[0])).time_since_epoch().count();
|
||||
if (timestamp <= _latest_row_timestamp) {
|
||||
return;
|
||||
}
|
||||
_latest_row_timestamp = timestamp;
|
||||
for (auto&& cdef : _schema.regular_columns()) {
|
||||
if (cdef.name_as_text() != "stream_id") {
|
||||
row_iterator.skip(cdef);
|
||||
continue;
|
||||
}
|
||||
auto val_opt = row_iterator.next_atomic_cell();
|
||||
assert(val_opt);
|
||||
val_opt->value().with_linearized([&] (bytes_view bv) {
|
||||
_latest_row_stream_id = value_cast<utils::UUID>(uuid_type->deserialize(bv));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void accept_new_row(const query::result_row_view& static_row, const query::result_row_view& row) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
void accept_partition_end(const query::result_row_view& static_row) {
|
||||
_streams.emplace(std::make_pair(_node_ip, _shard_id), _latest_row_stream_id);
|
||||
}
|
||||
|
||||
transformer::streams_type build() {
|
||||
return std::move(_streams);
|
||||
}
|
||||
};
|
||||
|
||||
static future<::shared_ptr<transformer::streams_type>> get_streams(
|
||||
db_context ctx,
|
||||
const sstring& ks_name,
|
||||
const sstring& cf_name,
|
||||
lowres_clock::time_point timeout,
|
||||
service::query_state& qs) {
|
||||
auto s =
|
||||
ctx._proxy.get_db().local().find_schema(ks_name, desc_name(cf_name));
|
||||
query::read_command cmd(
|
||||
s->id(),
|
||||
s->version(),
|
||||
partition_slice_builder(*s).with_no_static_columns().build());
|
||||
return ctx._proxy.query(
|
||||
s,
|
||||
make_lw_shared(std::move(cmd)),
|
||||
{dht::partition_range::make_open_ended_both_sides()},
|
||||
db::consistency_level::QUORUM,
|
||||
{timeout, qs.get_permit(), qs.get_client_state()}).then([s = std::move(s)] (auto qr) mutable {
|
||||
return query::result_view::do_with(*qr.query_result,
|
||||
[s = std::move(s)] (query::result_view v) {
|
||||
auto slice = partition_slice_builder(*s)
|
||||
.with_no_static_columns()
|
||||
.build();
|
||||
streams_builder builder{ *s };
|
||||
v.consume(slice, builder);
|
||||
return ::make_shared<transformer::streams_type>(builder.build());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
future<std::vector<mutation>>
|
||||
transform_mutations(std::vector<mutation>& muts, decltype(muts.size()) batch_size, Func&& f) {
|
||||
return parallel_for_each(
|
||||
boost::irange(static_cast<decltype(muts.size())>(0), muts.size(), batch_size),
|
||||
std::move(f))
|
||||
.then([&muts] () mutable { return std::move(muts); });
|
||||
}
|
||||
|
||||
} // namespace cdc
|
||||
|
||||
future<std::tuple<std::vector<mutation>, cdc::result_callback>>
|
||||
cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations) {
|
||||
// we do all this because in the case of batches, we can have mixed schemas.
|
||||
auto e = mutations.end();
|
||||
auto i = std::find_if(mutations.begin(), e, [](const mutation& m) {
|
||||
return m.schema()->cdc_options().enabled();
|
||||
});
|
||||
|
||||
if (i == e) {
|
||||
return make_ready_future<std::tuple<std::vector<mutation>, cdc::result_callback>>(std::make_tuple(std::move(mutations), result_callback{}));
|
||||
}
|
||||
|
||||
mutations.reserve(2 * mutations.size());
|
||||
|
||||
return do_with(std::move(mutations), service::query_state(service::client_state::for_internal_calls(), empty_service_permit()), [this, timeout, i](std::vector<mutation>& mutations, service::query_state& qs) {
|
||||
return transform_mutations(mutations, 1, [this, &mutations, timeout, &qs] (int idx) {
|
||||
auto& m = mutations[idx];
|
||||
auto s = m.schema();
|
||||
|
||||
if (!s->cdc_options().enabled()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
// for batches/multiple mutations this is super inefficient. either partition the mutation set by schema
|
||||
// and re-use streams, or probably better: add a cache so this lookup is a noop on second mutation
|
||||
return get_streams(_ctxt, s->ks_name(), s->cf_name(), timeout, qs).then([this, s = std::move(s), &qs, &mutations, idx](::shared_ptr<transformer::streams_type> streams) mutable {
|
||||
auto& m = mutations[idx]; // should not really need because of reserve, but lets be conservative
|
||||
transformer trans(_ctxt, s, streams);
|
||||
|
||||
if (!s->cdc_options().preimage()) {
|
||||
mutations.emplace_back(trans.transform(m));
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
// Note: further improvement here would be to coalesce the pre-image selects into one
|
||||
// iff a batch contains several modifications to the same table. Otoh, batch is rare(?)
|
||||
// so this is premature.
|
||||
auto f = trans.pre_image_select(qs.get_client_state(), db::consistency_level::LOCAL_QUORUM, m);
|
||||
return f.then([trans = std::move(trans), &mutations, idx] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
|
||||
mutations.push_back(trans.transform(mutations[idx], rs.get()));
|
||||
});
|
||||
});
|
||||
}).then([](std::vector<mutation> mutations) {
|
||||
return make_ready_future<std::tuple<std::vector<mutation>, cdc::result_callback>>(std::make_tuple(std::move(mutations), result_callback{}));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
bool cdc::cdc_service::needs_cdc_augmentation(const std::vector<mutation>& mutations) const {
|
||||
return std::any_of(mutations.begin(), mutations.end(), [](const mutation& m) {
|
||||
return m.schema()->cdc_options().enabled();
|
||||
});
|
||||
}
|
||||
|
||||
future<std::tuple<std::vector<mutation>, cdc::result_callback>>
|
||||
cdc::cdc_service::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations) {
|
||||
return _impl->augment_mutation_call(timeout, std::move(mutations));
|
||||
}
|
||||
142
cdc/cdc.hh
Normal file
142
cdc/cdc.hh
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/lowres_clock.hh>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "timestamp.hh"
|
||||
#include "cdc_options.hh"
|
||||
|
||||
class schema;
|
||||
using schema_ptr = seastar::lw_shared_ptr<const schema>;
|
||||
|
||||
namespace locator {
|
||||
|
||||
class snitch_ptr;
|
||||
class token_metadata;
|
||||
|
||||
} // namespace locator
|
||||
|
||||
namespace service {
|
||||
|
||||
class migration_notifier;
|
||||
class storage_proxy;
|
||||
class query_state;
|
||||
|
||||
} // namespace service
|
||||
|
||||
namespace dht {
|
||||
|
||||
class i_partitioner;
|
||||
|
||||
} // namespace dht
|
||||
|
||||
class mutation;
|
||||
class partition_key;
|
||||
|
||||
namespace cdc {
|
||||
|
||||
class db_context;
|
||||
|
||||
// Callback to be invoked on mutation finish to fix
|
||||
// the whole bit about post-image.
|
||||
// TODO: decide on what the parameters are to be for this.
|
||||
using result_callback = std::function<future<>()>;
|
||||
|
||||
/// \brief CDC service, responsible for schema listeners
|
||||
///
|
||||
/// CDC service will listen for schema changes and iff CDC is enabled/changed
|
||||
/// create/modify/delete corresponding log tables etc as part of the schema change.
|
||||
///
|
||||
class cdc_service {
|
||||
class impl;
|
||||
std::unique_ptr<impl> _impl;
|
||||
public:
|
||||
future<> stop();
|
||||
cdc_service(service::storage_proxy&);
|
||||
cdc_service(db_context);
|
||||
~cdc_service();
|
||||
|
||||
// If any of the mutations are cdc enabled, optionally selects preimage, and adds the
|
||||
// appropriate augments to set the log entries.
|
||||
// Iff post-image is enabled for any of these, a non-empty callback is also
|
||||
// returned to be invoked post the mutation query.
|
||||
future<std::tuple<std::vector<mutation>, result_callback>> augment_mutation_call(
|
||||
lowres_clock::time_point timeout,
|
||||
std::vector<mutation>&& mutations
|
||||
);
|
||||
bool needs_cdc_augmentation(const std::vector<mutation>&) const;
|
||||
};
|
||||
|
||||
struct db_context final {
|
||||
service::storage_proxy& _proxy;
|
||||
service::migration_notifier& _migration_notifier;
|
||||
locator::token_metadata& _token_metadata;
|
||||
locator::snitch_ptr& _snitch;
|
||||
dht::i_partitioner& _partitioner;
|
||||
|
||||
class builder final {
|
||||
service::storage_proxy& _proxy;
|
||||
std::optional<std::reference_wrapper<service::migration_notifier>> _migration_notifier;
|
||||
std::optional<std::reference_wrapper<locator::token_metadata>> _token_metadata;
|
||||
std::optional<std::reference_wrapper<locator::snitch_ptr>> _snitch;
|
||||
std::optional<std::reference_wrapper<dht::i_partitioner>> _partitioner;
|
||||
public:
|
||||
builder(service::storage_proxy& proxy);
|
||||
|
||||
builder& with_migration_notifier(service::migration_notifier& migration_notifier);
|
||||
builder& with_token_metadata(locator::token_metadata& token_metadata);
|
||||
builder& with_snitch(locator::snitch_ptr& snitch);
|
||||
builder& with_partitioner(dht::i_partitioner& partitioner);
|
||||
|
||||
db_context build();
|
||||
};
|
||||
};
|
||||
|
||||
// cdc log table operation
|
||||
enum class operation : int8_t {
|
||||
// note: these values will eventually be read by a third party, probably not privvy to this
|
||||
// enum decl, so don't change the constant values (or the datatype).
|
||||
pre_image = 0, update = 1, row_delete = 2, range_delete_start = 3, range_delete_end = 4, partition_delete = 5
|
||||
};
|
||||
|
||||
// cdc log data column operation
|
||||
enum class column_op : int8_t {
|
||||
// same as "operation". Do not edit values or type/type unless you _really_ want to.
|
||||
set = 0, del = 1, add = 2,
|
||||
};
|
||||
|
||||
seastar::sstring log_name(const seastar::sstring& table_name);
|
||||
|
||||
seastar::sstring desc_name(const seastar::sstring& table_name);
|
||||
|
||||
} // namespace cdc
|
||||
@@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright 2020 ScyllaDB
|
||||
*/
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "serializer.hh"
|
||||
#include "db/extensions.hh"
|
||||
#include "cdc/cdc_options.hh"
|
||||
#include "schema.hh"
|
||||
|
||||
namespace cdc {
|
||||
|
||||
class cdc_extension : public schema_extension {
|
||||
cdc::options _cdc_options;
|
||||
public:
|
||||
static constexpr auto NAME = "cdc";
|
||||
|
||||
cdc_extension() = default;
|
||||
explicit cdc_extension(std::map<sstring, sstring> tags) : _cdc_options(std::move(tags)) {}
|
||||
explicit cdc_extension(const bytes& b) : _cdc_options(cdc_extension::deserialize(b)) {}
|
||||
explicit cdc_extension(const sstring& s) {
|
||||
throw std::logic_error("Cannot create cdc info from string");
|
||||
}
|
||||
bytes serialize() const override {
|
||||
return ser::serialize_to_buffer<bytes>(_cdc_options.to_map());
|
||||
}
|
||||
static std::map<sstring, sstring> deserialize(const bytes_view& buffer) {
|
||||
return ser::deserialize_from_buffer(buffer, boost::type<std::map<sstring, sstring>>());
|
||||
}
|
||||
const options& get_options() const {
|
||||
return _cdc_options;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,405 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/type.hpp>
|
||||
#include <random>
|
||||
#include <unordered_set>
|
||||
#include <seastar/core/sleep.hh>
|
||||
|
||||
#include "keys.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "dht/token-sharding.hh"
|
||||
#include "locator/token_metadata.hh"
|
||||
#include "gms/application_state.hh"
|
||||
#include "gms/inet_address.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
|
||||
#include "cdc/generation.hh"
|
||||
|
||||
extern logging::logger cdc_log;
|
||||
|
||||
static int get_shard_count(const gms::inet_address& endpoint, const gms::gossiper& g) {
|
||||
auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::SHARD_COUNT);
|
||||
return ep_state ? std::stoi(ep_state->value) : -1;
|
||||
}
|
||||
|
||||
static unsigned get_sharding_ignore_msb(const gms::inet_address& endpoint, const gms::gossiper& g) {
|
||||
auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::IGNORE_MSB_BITS);
|
||||
return ep_state ? std::stoi(ep_state->value) : 0;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
|
||||
extern const api::timestamp_clock::duration generation_leeway =
|
||||
std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::seconds(5));
|
||||
|
||||
static void copy_int_to_bytes(int64_t i, size_t offset, bytes& b) {
|
||||
i = net::hton(i);
|
||||
std::copy_n(reinterpret_cast<int8_t*>(&i), sizeof(int64_t), b.begin() + offset);
|
||||
}
|
||||
|
||||
stream_id::stream_id(int64_t first, int64_t second)
|
||||
: _value(bytes::initialized_later(), 2 * sizeof(int64_t))
|
||||
{
|
||||
copy_int_to_bytes(first, 0, _value);
|
||||
copy_int_to_bytes(second, sizeof(int64_t), _value);
|
||||
}
|
||||
|
||||
stream_id::stream_id(bytes b) : _value(std::move(b)) { }
|
||||
|
||||
bool stream_id::is_set() const {
|
||||
return !_value.empty();
|
||||
}
|
||||
|
||||
bool stream_id::operator==(const stream_id& o) const {
|
||||
return _value == o._value;
|
||||
}
|
||||
|
||||
bool stream_id::operator<(const stream_id& o) const {
|
||||
return _value < o._value;
|
||||
}
|
||||
|
||||
static int64_t bytes_to_int64(const bytes& b, size_t offset) {
|
||||
assert(b.size() >= offset + sizeof(int64_t));
|
||||
int64_t res;
|
||||
std::copy_n(b.begin() + offset, sizeof(int64_t), reinterpret_cast<int8_t *>(&res));
|
||||
return net::ntoh(res);
|
||||
}
|
||||
|
||||
int64_t stream_id::first() const {
|
||||
return bytes_to_int64(_value, 0);
|
||||
}
|
||||
|
||||
int64_t stream_id::second() const {
|
||||
return bytes_to_int64(_value, sizeof(int64_t));
|
||||
}
|
||||
|
||||
const bytes& stream_id::to_bytes() const {
|
||||
return _value;
|
||||
}
|
||||
|
||||
partition_key stream_id::to_partition_key(const schema& log_schema) const {
|
||||
return partition_key::from_single_value(log_schema, _value);
|
||||
}
|
||||
|
||||
bool token_range_description::operator==(const token_range_description& o) const {
|
||||
return token_range_end == o.token_range_end && streams == o.streams
|
||||
&& sharding_ignore_msb == o.sharding_ignore_msb;
|
||||
}
|
||||
|
||||
topology_description::topology_description(std::vector<token_range_description> entries)
|
||||
: _entries(std::move(entries)) {}
|
||||
|
||||
bool topology_description::operator==(const topology_description& o) const {
|
||||
return _entries == o._entries;
|
||||
}
|
||||
|
||||
const std::vector<token_range_description>& topology_description::entries() const {
|
||||
return _entries;
|
||||
}
|
||||
|
||||
static stream_id make_random_stream_id() {
|
||||
static thread_local std::mt19937_64 rand_gen(std::random_device().operator()());
|
||||
static thread_local std::uniform_int_distribution<int64_t> rand_dist(std::numeric_limits<int64_t>::min());
|
||||
|
||||
return {rand_dist(rand_gen), rand_dist(rand_gen)};
|
||||
}
|
||||
|
||||
/* Given:
|
||||
* 1. a set of tokens which split the token ring into token ranges (vnodes),
|
||||
* 2. information on how each token range is distributed among its owning node's shards
|
||||
* this function tries to generate a set of CDC stream identifiers such that for each
|
||||
* shard and vnode pair there exists a stream whose token falls into this
|
||||
* vnode and is owned by this shard.
|
||||
*
|
||||
* It then builds a cdc::topology_description which maps tokens to these
|
||||
* found stream identifiers, such that if token T is owned by shard S in vnode V,
|
||||
* it gets mapped to the stream identifier generated for (S, V).
|
||||
*/
|
||||
// Run in seastar::async context.
|
||||
topology_description generate_topology_description(
|
||||
const db::config& cfg,
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const locator::token_metadata& token_metadata,
|
||||
const gms::gossiper& gossiper) {
|
||||
if (bootstrap_tokens.empty()) {
|
||||
throw std::runtime_error(
|
||||
"cdc: bootstrap tokens is empty in generate_topology_description");
|
||||
}
|
||||
|
||||
auto tokens = token_metadata.sorted_tokens();
|
||||
tokens.insert(tokens.end(), bootstrap_tokens.begin(), bootstrap_tokens.end());
|
||||
std::sort(tokens.begin(), tokens.end());
|
||||
tokens.erase(std::unique(tokens.begin(), tokens.end()), tokens.end());
|
||||
|
||||
std::vector<token_range_description> entries(tokens.size());
|
||||
int spots_to_fill = 0;
|
||||
|
||||
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||
auto& entry = entries[i];
|
||||
entry.token_range_end = tokens[i];
|
||||
|
||||
if (bootstrap_tokens.count(entry.token_range_end) > 0) {
|
||||
entry.streams.resize(smp::count);
|
||||
entry.sharding_ignore_msb = cfg.murmur3_partitioner_ignore_msb_bits();
|
||||
} else {
|
||||
auto endpoint = token_metadata.get_endpoint(entry.token_range_end);
|
||||
if (!endpoint) {
|
||||
throw std::runtime_error(format("Can't find endpoint for token {}", entry.token_range_end));
|
||||
}
|
||||
auto sc = get_shard_count(*endpoint, gossiper);
|
||||
entry.streams.resize(sc > 0 ? sc : 1);
|
||||
entry.sharding_ignore_msb = get_sharding_ignore_msb(*endpoint, gossiper);
|
||||
}
|
||||
|
||||
spots_to_fill += entry.streams.size();
|
||||
}
|
||||
|
||||
auto schema = schema_builder("fake_ks", "fake_table")
|
||||
.with_column("stream_id", bytes_type, column_kind::partition_key)
|
||||
.build();
|
||||
|
||||
auto quota = std::chrono::seconds(spots_to_fill / 2000 + 1);
|
||||
auto start_time = std::chrono::system_clock::now();
|
||||
|
||||
// For each pair (i, j), 0 <= i < streams.size(), 0 <= j < streams[i].size(),
|
||||
// try to find a stream (stream[i][j]) such that the token of this stream will get mapped to this stream
|
||||
// (refer to the comments above topology_description's definition to understand how it describes the mapping).
|
||||
// We find the streams by randomly generating them and checking into which pairs they get mapped.
|
||||
// NOTE: this algorithm is temporary and will be replaced after per-table-partitioner feature gets merged in.
|
||||
repeat([&] {
|
||||
for (int i = 0; i < 500; ++i) {
|
||||
auto stream_id = make_random_stream_id();
|
||||
auto token = dht::get_token(*schema, stream_id.to_partition_key(*schema));
|
||||
|
||||
// Find the token range into which our stream_id's token landed.
|
||||
auto it = std::lower_bound(tokens.begin(), tokens.end(), token);
|
||||
auto& entry = entries[it != tokens.end() ? std::distance(tokens.begin(), it) : 0];
|
||||
|
||||
auto shard_id = dht::shard_of(entry.streams.size(), entry.sharding_ignore_msb, token);
|
||||
assert(shard_id < entry.streams.size());
|
||||
|
||||
if (!entry.streams[shard_id].is_set()) {
|
||||
--spots_to_fill;
|
||||
entry.streams[shard_id] = stream_id;
|
||||
}
|
||||
}
|
||||
|
||||
if (!spots_to_fill) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto passed = std::chrono::duration_cast<std::chrono::seconds>(now - start_time);
|
||||
if (passed > quota) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
|
||||
return stop_iteration::no;
|
||||
}).get();
|
||||
|
||||
if (spots_to_fill) {
|
||||
// We were not able to generate stream ids for each (token range, shard) pair.
|
||||
|
||||
// For each range that has a stream, for each shard for this range that doesn't have a stream,
|
||||
// use the stream id of the next shard for this range.
|
||||
|
||||
// For each range that doesn't have any stream,
|
||||
// use streams of the first range to the left which does have a stream.
|
||||
|
||||
cdc_log.warn("Generation of CDC streams failed to create streams for some (vnode, shard) pair."
|
||||
" This can lead to worse performance.");
|
||||
|
||||
stream_id some_stream;
|
||||
size_t idx = 0;
|
||||
for (; idx < entries.size(); ++idx) {
|
||||
for (auto s: entries[idx].streams) {
|
||||
if (s.is_set()) {
|
||||
some_stream = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (some_stream.is_set()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(idx != entries.size() && some_stream.is_set());
|
||||
|
||||
// Iterate over all ranges in the clockwise direction, starting with the one we found a stream for.
|
||||
for (size_t off = 0; off < entries.size(); ++off) {
|
||||
auto& ss = entries[(idx + off) % entries.size()].streams;
|
||||
|
||||
int last_set_stream_idx = ss.size() - 1;
|
||||
while (last_set_stream_idx > -1 && !ss[last_set_stream_idx].is_set()) {
|
||||
--last_set_stream_idx;
|
||||
}
|
||||
|
||||
if (last_set_stream_idx == -1) {
|
||||
cdc_log.warn(
|
||||
"CDC wasn't able to generate any stream for vnode ({}, {}]. We'll use another vnode's streams"
|
||||
" instead. This might lead to inconsistencies.",
|
||||
tokens[(idx + off + entries.size() - 1) % entries.size()], tokens[(idx + off) % entries.size()]);
|
||||
|
||||
ss[0] = some_stream;
|
||||
last_set_stream_idx = 0;
|
||||
}
|
||||
|
||||
some_stream = ss[last_set_stream_idx];
|
||||
|
||||
// Replace 'unset' stream ids with indexes below last_set_stream_idx
|
||||
for (int s_idx = last_set_stream_idx - 1; s_idx > -1; --s_idx) {
|
||||
if (ss[s_idx].is_set()) {
|
||||
some_stream = ss[s_idx];
|
||||
} else {
|
||||
ss[s_idx] = some_stream;
|
||||
}
|
||||
}
|
||||
// Replace 'unset' stream ids with indexes above last_set_stream_idx
|
||||
for (int s_idx = ss.size() - 1; s_idx > last_set_stream_idx; --s_idx) {
|
||||
if (ss[s_idx].is_set()) {
|
||||
some_stream = ss[s_idx];
|
||||
} else {
|
||||
ss[s_idx] = some_stream;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {std::move(entries)};
|
||||
}
|
||||
|
||||
bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper& g) {
|
||||
auto my_host_id = g.get_host_id(me);
|
||||
auto& eps = g.get_endpoint_states();
|
||||
return std::none_of(eps.begin(), eps.end(),
|
||||
[&] (const std::pair<gms::inet_address, gms::endpoint_state>& ep) {
|
||||
return my_host_id < g.get_host_id(ep.first);
|
||||
});
|
||||
}
|
||||
|
||||
future<db_clock::time_point> get_local_streams_timestamp() {
|
||||
return db::system_keyspace::get_saved_cdc_streams_timestamp().then([] (std::optional<db_clock::time_point> ts) {
|
||||
if (!ts) {
|
||||
auto err = format("get_local_streams_timestamp: tried to retrieve streams timestamp after bootstrapping, but it's not present");
|
||||
cdc_log.error("{}", err);
|
||||
throw std::runtime_error(err);
|
||||
}
|
||||
return *ts;
|
||||
});
|
||||
}
|
||||
|
||||
// Run inside seastar::async context.
|
||||
db_clock::time_point make_new_cdc_generation(
|
||||
const db::config& cfg,
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const locator::token_metadata& tm,
|
||||
const gms::gossiper& g,
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
std::chrono::milliseconds ring_delay,
|
||||
bool for_testing) {
|
||||
assert(!bootstrap_tokens.empty());
|
||||
|
||||
auto gen = generate_topology_description(cfg, bootstrap_tokens, tm, g);
|
||||
|
||||
// Begin the race.
|
||||
auto ts = db_clock::now() + (
|
||||
for_testing ? std::chrono::milliseconds(0) : (
|
||||
2 * ring_delay + std::chrono::duration_cast<std::chrono::milliseconds>(generation_leeway)));
|
||||
sys_dist_ks.insert_cdc_topology_description(ts, std::move(gen), { tm.count_normal_token_owners() }).get();
|
||||
|
||||
return ts;
|
||||
}
|
||||
|
||||
std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_address& endpoint, const gms::gossiper& g) {
|
||||
auto streams_ts_string = g.get_application_state_value(endpoint, gms::application_state::CDC_STREAMS_TIMESTAMP);
|
||||
cdc_log.trace("endpoint={}, streams_ts_string={}", endpoint, streams_ts_string);
|
||||
|
||||
if (streams_ts_string.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return db_clock::time_point(db_clock::duration(std::stoll(streams_ts_string)));
|
||||
}
|
||||
|
||||
// Run inside seastar::async context.
|
||||
static void do_update_streams_description(
|
||||
db_clock::time_point streams_ts,
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
db::system_distributed_keyspace::context ctx) {
|
||||
if (sys_dist_ks.cdc_desc_exists(streams_ts, ctx).get0()) {
|
||||
cdc_log.debug("update_streams_description: description of generation {} already inserted", streams_ts);
|
||||
return;
|
||||
}
|
||||
|
||||
// We might race with another node also inserting the description, but that's ok. It's an idempotent operation.
|
||||
|
||||
auto topo = sys_dist_ks.read_cdc_topology_description(streams_ts, ctx).get0();
|
||||
if (!topo) {
|
||||
throw std::runtime_error(format("could not find streams data for timestamp {}", streams_ts));
|
||||
}
|
||||
|
||||
std::set<cdc::stream_id> streams_set;
|
||||
for (auto& entry: topo->entries()) {
|
||||
streams_set.insert(entry.streams.begin(), entry.streams.end());
|
||||
}
|
||||
|
||||
std::vector<cdc::stream_id> streams_vec(streams_set.begin(), streams_set.end());
|
||||
|
||||
sys_dist_ks.create_cdc_desc(streams_ts, streams_vec, ctx).get();
|
||||
cdc_log.info("CDC description table successfully updated with generation {}.", streams_ts);
|
||||
}
|
||||
|
||||
void update_streams_description(
|
||||
db_clock::time_point streams_ts,
|
||||
shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source& abort_src) {
|
||||
try {
|
||||
do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
|
||||
} catch(...) {
|
||||
cdc_log.warn(
|
||||
"Could not update CDC description table with generation {}: {}. Will retry in the background.",
|
||||
streams_ts, std::current_exception());
|
||||
|
||||
// It is safe to discard this future: we keep system distributed keyspace alive.
|
||||
(void)seastar::async([
|
||||
streams_ts, sys_dist_ks, get_num_token_owners = std::move(get_num_token_owners), &abort_src
|
||||
] {
|
||||
while (true) {
|
||||
sleep_abortable(std::chrono::seconds(60), abort_src).get();
|
||||
try {
|
||||
do_update_streams_description(streams_ts, *sys_dist_ks, { get_num_token_owners() });
|
||||
return;
|
||||
} catch (...) {
|
||||
cdc_log.warn(
|
||||
"Could not update CDC description table with generation {}: {}. Will try again.",
|
||||
streams_ts, std::current_exception());
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cdc
|
||||
@@ -1,176 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains classes and functions used to manage CDC generations:
|
||||
* sets of CDC stream identifiers used by the cluster to choose partition keys for CDC log writes.
|
||||
* Each CDC generation begins operating at a specific time point, called the generation's timestamp
|
||||
* (`cdc_streams_timpestamp` or `streams_timestamp` in the code).
|
||||
* The generation is used by all nodes in the cluster to pick CDC streams until superseded by a new generation.
|
||||
*
|
||||
* Functions from this module are used by the node joining procedure to introduce new CDC generations to the cluster
|
||||
* (which is necessary due to new tokens being inserted into the token ring), or during rolling upgrade
|
||||
* if CDC is enabled for the first time.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <seastar/util/noncopyable_function.hh>
|
||||
|
||||
#include "database_fwd.hh"
|
||||
#include "db_clock.hh"
|
||||
#include "dht/token.hh"
|
||||
|
||||
namespace seastar {
|
||||
class abort_source;
|
||||
} // namespace seastar
|
||||
|
||||
namespace db {
|
||||
class config;
|
||||
class system_distributed_keyspace;
|
||||
} // namespace db
|
||||
|
||||
namespace gms {
|
||||
class inet_address;
|
||||
class gossiper;
|
||||
} // namespace gms
|
||||
|
||||
namespace locator {
|
||||
class token_metadata;
|
||||
} // namespace locator
|
||||
|
||||
namespace cdc {
|
||||
|
||||
class stream_id final {
|
||||
bytes _value;
|
||||
public:
|
||||
stream_id() = default;
|
||||
stream_id(int64_t, int64_t);
|
||||
stream_id(bytes);
|
||||
bool is_set() const;
|
||||
bool operator==(const stream_id&) const;
|
||||
bool operator<(const stream_id&) const;
|
||||
|
||||
int64_t first() const;
|
||||
int64_t second() const;
|
||||
|
||||
const bytes& to_bytes() const;
|
||||
|
||||
partition_key to_partition_key(const schema& log_schema) const;
|
||||
};
|
||||
|
||||
/* Describes a mapping of tokens to CDC streams in a token range.
|
||||
*
|
||||
* The range ends with `token_range_end`. A vector of `token_range_description`s defines the ranges entirely
|
||||
* (the end of the `i`th range is the beginning of the `i+1 % size()`th range). Ranges are left-opened, right-closed.
|
||||
*
|
||||
* Tokens in the range ending with `token_range_end` are mapped to streams in the `streams` vector as follows:
|
||||
* token `T` is mapped to `streams[j]` if and only if the used partitioner maps `T` to the `j`th shard,
|
||||
* assuming that the partitioner is configured for `streams.size()` shards and (partitioner's) `sharding_ignore_msb`
|
||||
* equals to the given `sharding_ignore_msb`.
|
||||
*/
|
||||
struct token_range_description {
|
||||
dht::token token_range_end;
|
||||
std::vector<stream_id> streams;
|
||||
uint8_t sharding_ignore_msb;
|
||||
|
||||
bool operator==(const token_range_description&) const;
|
||||
};
|
||||
|
||||
|
||||
/* Describes a mapping of tokens to CDC streams in a whole token ring.
|
||||
*
|
||||
* Division of the ring to token ranges is defined in terms of `token_range_end`s
|
||||
* in the `_entries` vector. See the comment above `token_range_description` for explanation.
|
||||
*/
|
||||
class topology_description {
|
||||
std::vector<token_range_description> _entries;
|
||||
public:
|
||||
topology_description(std::vector<token_range_description> entries);
|
||||
bool operator==(const topology_description&) const;
|
||||
|
||||
const std::vector<token_range_description>& entries() const;
|
||||
};
|
||||
|
||||
/* Should be called when we're restarting and we noticed that we didn't save any streams timestamp in our local tables,
|
||||
* which means that we're probably upgrading from a non-CDC/old CDC version (another reason could be
|
||||
* that there's a bug, or the user messed with our local tables).
|
||||
*
|
||||
* It checks whether we should be the node to propose the first generation of CDC streams.
|
||||
* The chosen condition is arbitrary, it only tries to make sure that no two nodes propose a generation of streams
|
||||
* when upgrading, and nothing bad happens if they for some reason do (it's mostly an optimization).
|
||||
*/
|
||||
bool should_propose_first_generation(const gms::inet_address& me, const gms::gossiper&);
|
||||
|
||||
/*
|
||||
* Read this node's streams generation timestamp stored in the LOCAL table.
|
||||
* Assumes that the node has successfully bootstrapped, and we're not upgrading from a non-CDC version,
|
||||
* so the timestamp is present.
|
||||
*/
|
||||
future<db_clock::time_point> get_local_streams_timestamp();
|
||||
|
||||
/* Generate a new set of CDC streams and insert it into the distributed cdc_topology_description table.
|
||||
* Returns the timestamp of this new generation.
|
||||
*
|
||||
* Should be called when starting the node for the first time (i.e., joining the ring).
|
||||
*
|
||||
* Assumes that the system_distributed keyspace is initialized.
|
||||
*
|
||||
* The caller of this function is expected to insert this timestamp into the gossiper as fast as possible,
|
||||
* so that other nodes learn about the generation before their clocks cross the timestmap
|
||||
* (not guaranteed in the current implementation, but expected to be the common case;
|
||||
* we assume that `ring_delay` is enough for other nodes to learn about the new generation).
|
||||
*/
|
||||
db_clock::time_point make_new_cdc_generation(
|
||||
const db::config& cfg,
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens,
|
||||
const locator::token_metadata& tm,
|
||||
const gms::gossiper& g,
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
std::chrono::milliseconds ring_delay,
|
||||
bool for_testing);
|
||||
|
||||
/* Retrieves CDC streams generation timestamp from the given endpoint's application state (broadcasted through gossip).
|
||||
* We might be during a rolling upgrade, so the timestamp might not be there (if the other node didn't upgrade yet),
|
||||
* but if the cluster already supports CDC, then every newly joining node will propose a new CDC generation,
|
||||
* which means it will gossip the generation's timestamp.
|
||||
*/
|
||||
std::optional<db_clock::time_point> get_streams_timestamp_for(const gms::inet_address& endpoint, const gms::gossiper&);
|
||||
|
||||
/* Inform CDC users about a generation of streams (identified by the given timestamp)
|
||||
* by inserting it into the cdc_description table.
|
||||
*
|
||||
* Assumes that the cdc_topology_description table contains this generation.
|
||||
*
|
||||
* Returning from this function does not mean that the table update was successful: the function
|
||||
* might run an asynchronous task in the background.
|
||||
*
|
||||
* Run inside seastar::async context.
|
||||
*/
|
||||
void update_streams_description(
|
||||
db_clock::time_point,
|
||||
shared_ptr<db::system_distributed_keyspace>,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source&);
|
||||
|
||||
} // namespace cdc
|
||||
1394
cdc/log.cc
1394
cdc/log.cc
File diff suppressed because it is too large
Load Diff
145
cdc/log.hh
145
cdc/log.hh
@@ -1,145 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This module manages CDC log tables. It contains facilities used to:
|
||||
* - perform schema changes to CDC log tables correspondingly when base tables are changed,
|
||||
* - perform writes to CDC log tables correspondingly when writes to base tables are made.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/lowres_clock.hh>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "timestamp.hh"
|
||||
#include "tracing/trace_state.hh"
|
||||
#include "cdc_options.hh"
|
||||
#include "utils/UUID.hh"
|
||||
|
||||
class schema;
|
||||
using schema_ptr = seastar::lw_shared_ptr<const schema>;
|
||||
|
||||
namespace locator {
|
||||
|
||||
class token_metadata;
|
||||
|
||||
} // namespace locator
|
||||
|
||||
namespace service {
|
||||
|
||||
class migration_notifier;
|
||||
class storage_proxy;
|
||||
class query_state;
|
||||
|
||||
} // namespace service
|
||||
|
||||
class mutation;
|
||||
class partition_key;
|
||||
|
||||
namespace cdc {
|
||||
|
||||
struct operation_result_tracker;
|
||||
class db_context;
|
||||
class metadata;
|
||||
|
||||
/// \brief CDC service, responsible for schema listeners
|
||||
///
|
||||
/// CDC service will listen for schema changes and iff CDC is enabled/changed
|
||||
/// create/modify/delete corresponding log tables etc as part of the schema change.
|
||||
///
|
||||
class cdc_service {
|
||||
class impl;
|
||||
std::unique_ptr<impl> _impl;
|
||||
public:
|
||||
future<> stop();
|
||||
cdc_service(service::storage_proxy&);
|
||||
cdc_service(db_context);
|
||||
~cdc_service();
|
||||
|
||||
// If any of the mutations are cdc enabled, optionally selects preimage, and adds the
|
||||
// appropriate augments to set the log entries.
|
||||
// Iff post-image is enabled for any of these, a non-empty callback is also
|
||||
// returned to be invoked post the mutation query.
|
||||
future<std::tuple<std::vector<mutation>, lw_shared_ptr<operation_result_tracker>>> augment_mutation_call(
|
||||
lowres_clock::time_point timeout,
|
||||
std::vector<mutation>&& mutations,
|
||||
tracing::trace_state_ptr tr_state
|
||||
);
|
||||
bool needs_cdc_augmentation(const std::vector<mutation>&) const;
|
||||
};
|
||||
|
||||
struct db_context final {
|
||||
service::storage_proxy& _proxy;
|
||||
service::migration_notifier& _migration_notifier;
|
||||
locator::token_metadata& _token_metadata;
|
||||
cdc::metadata& _cdc_metadata;
|
||||
|
||||
class builder final {
|
||||
service::storage_proxy& _proxy;
|
||||
std::optional<std::reference_wrapper<service::migration_notifier>> _migration_notifier;
|
||||
std::optional<std::reference_wrapper<locator::token_metadata>> _token_metadata;
|
||||
std::optional<std::reference_wrapper<cdc::metadata>> _cdc_metadata;
|
||||
public:
|
||||
builder(service::storage_proxy& proxy);
|
||||
|
||||
builder& with_migration_notifier(service::migration_notifier& migration_notifier);
|
||||
builder& with_token_metadata(locator::token_metadata& token_metadata);
|
||||
builder& with_cdc_metadata(cdc::metadata&);
|
||||
|
||||
db_context build();
|
||||
};
|
||||
};
|
||||
|
||||
// cdc log table operation
|
||||
enum class operation : int8_t {
|
||||
// note: these values will eventually be read by a third party, probably not privvy to this
|
||||
// enum decl, so don't change the constant values (or the datatype).
|
||||
pre_image = 0, update = 1, insert = 2, row_delete = 3, partition_delete = 4,
|
||||
range_delete_start_inclusive = 5, range_delete_start_exclusive = 6, range_delete_end_inclusive = 7, range_delete_end_exclusive = 8,
|
||||
post_image = 9,
|
||||
};
|
||||
|
||||
bool is_log_for_some_table(const sstring& ks_name, const std::string_view& table_name);
|
||||
seastar::sstring log_name(const seastar::sstring& table_name);
|
||||
seastar::sstring log_data_column_name(std::string_view column_name);
|
||||
seastar::sstring log_meta_column_name(std::string_view column_name);
|
||||
bytes log_data_column_name_bytes(const bytes& column_name);
|
||||
bytes log_meta_column_name_bytes(const bytes& column_name);
|
||||
|
||||
seastar::sstring log_data_column_deleted_name(std::string_view column_name);
|
||||
bytes log_data_column_deleted_name_bytes(const bytes& column_name);
|
||||
|
||||
seastar::sstring log_data_column_deleted_elements_name(std::string_view column_name);
|
||||
bytes log_data_column_deleted_elements_name_bytes(const bytes& column_name);
|
||||
|
||||
utils::UUID generate_timeuuid(api::timestamp_type t);
|
||||
|
||||
} // namespace cdc
|
||||
200
cdc/metadata.cc
200
cdc/metadata.cc
@@ -1,200 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "dht/token-sharding.hh"
|
||||
#include "utils/exceptions.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
|
||||
#include "cdc/generation.hh"
|
||||
#include "cdc/metadata.hh"
|
||||
|
||||
extern logging::logger cdc_log;
|
||||
|
||||
namespace cdc {
|
||||
extern const api::timestamp_clock::duration generation_leeway;
|
||||
} // namespace cdc
|
||||
|
||||
static api::timestamp_type to_ts(db_clock::time_point tp) {
|
||||
// This assumes that timestamp_clock and db_clock have the same epochs.
|
||||
return std::chrono::duration_cast<api::timestamp_clock::duration>(tp.time_since_epoch()).count();
|
||||
}
|
||||
|
||||
static cdc::stream_id get_stream(
|
||||
const cdc::token_range_description& entry,
|
||||
dht::token tok) {
|
||||
// The ith stream is the stream for the ith shard.
|
||||
auto shard_cnt = entry.streams.size();
|
||||
auto shard_id = dht::shard_of(shard_cnt, entry.sharding_ignore_msb, tok);
|
||||
|
||||
if (shard_id >= shard_cnt) {
|
||||
on_internal_error(cdc_log, "get_stream: shard_id out of bounds");
|
||||
}
|
||||
|
||||
return entry.streams[shard_id];
|
||||
}
|
||||
|
||||
static cdc::stream_id get_stream(
|
||||
const std::vector<cdc::token_range_description>& entries,
|
||||
dht::token tok) {
|
||||
if (entries.empty()) {
|
||||
on_internal_error(cdc_log, "get_stream: entries empty");
|
||||
}
|
||||
|
||||
auto it = std::lower_bound(entries.begin(), entries.end(), tok,
|
||||
[] (const cdc::token_range_description& e, dht::token t) { return e.token_range_end < t; });
|
||||
if (it == entries.end()) {
|
||||
it = entries.begin();
|
||||
}
|
||||
|
||||
return get_stream(*it, tok);
|
||||
}
|
||||
|
||||
cdc::metadata::container_t::const_iterator cdc::metadata::gen_used_at(api::timestamp_type ts) const {
|
||||
auto it = _gens.upper_bound(ts);
|
||||
if (it == _gens.begin()) {
|
||||
// All known generations have higher timestamps than `ts`.
|
||||
return _gens.end();
|
||||
}
|
||||
|
||||
return std::prev(it);
|
||||
}
|
||||
|
||||
cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok) {
|
||||
auto now = api::new_timestamp();
|
||||
if (ts > now + generation_leeway.count()) {
|
||||
throw exceptions::invalid_request_exception(format(
|
||||
"cdc: attempted to get a stream \"from the future\" ({}; current server time: {})."
|
||||
" With CDC you cannot send writes with timestamps arbitrarily into the future, because we don't"
|
||||
" know what streams will be used at that time.\n"
|
||||
"We *do* allow sending writes into the near future, but our ability to do that is limited."
|
||||
" If you really must use your own timestamps, then make sure your clocks are well-synchronized"
|
||||
" with the database's clocks.", format_timestamp(ts), format_timestamp(now)));
|
||||
// Note that we might still send a write to a wrong generation, if we learn about the current
|
||||
// generation too late (we might think that an earlier generation is the current one).
|
||||
// Nothing protects us from that until we start using transactions for generation switching.
|
||||
}
|
||||
|
||||
auto it = gen_used_at(now);
|
||||
if (it == _gens.end()) {
|
||||
throw std::runtime_error(format(
|
||||
"cdc::metadata::get_stream: could not find any CDC stream (current time: {})."
|
||||
" Are we in the middle of a cluster upgrade?", format_timestamp(now)));
|
||||
}
|
||||
|
||||
// Garbage-collect generations that will no longer be used.
|
||||
it = _gens.erase(_gens.begin(), it);
|
||||
|
||||
if (it->first > ts) {
|
||||
throw exceptions::invalid_request_exception(format(
|
||||
"cdc: attempted to get a stream from an earlier generation than the currently used one."
|
||||
" With CDC you cannot send writes with timestamps too far into the past, because that would break"
|
||||
" consistency properties (write timestamp: {}, current generation started at: {})",
|
||||
format_timestamp(ts), format_timestamp(it->first)));
|
||||
}
|
||||
|
||||
// With `generation_leeway` we allow sending writes to the near future. It might happen
|
||||
// that `ts` doesn't belong to the current generation ("current" according to our clock),
|
||||
// but to the next generation. Adjust for this case:
|
||||
{
|
||||
auto next_it = std::next(it);
|
||||
while (next_it != _gens.end() && next_it->first <= ts) {
|
||||
it = next_it++;
|
||||
}
|
||||
}
|
||||
// Note: if there is a next generation that `ts` belongs to, but we don't know about it,
|
||||
// then too bad. This is no different from the situation in which we didn't manage to learn
|
||||
// about the current generation in time. We won't be able to prevent it until we introduce transactions.
|
||||
|
||||
if (!it->second) {
|
||||
throw std::runtime_error(format(
|
||||
"cdc: attempted to get a stream from a generation that we know about, but weren't able to retrieve"
|
||||
" (generation timestamp: {}, write timestamp: {}). Make sure that the replicas which contain"
|
||||
" this generation's data are alive and reachable from this node.", format_timestamp(it->first), format_timestamp(ts)));
|
||||
}
|
||||
|
||||
auto& gen = *it->second;
|
||||
auto ret = ::get_stream(gen.entries(), tok);
|
||||
_last_stream_timestamp = ts;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool cdc::metadata::known_or_obsolete(db_clock::time_point tp) const {
|
||||
auto ts = to_ts(tp);
|
||||
auto it = _gens.lower_bound(ts);
|
||||
|
||||
if (it == _gens.end()) {
|
||||
// No known generations with timestamp >= ts.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (it->first == ts) {
|
||||
if (it->second) {
|
||||
// We already inserted this particular generation.
|
||||
return true;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
|
||||
// Check if some new generation has already superseded this one.
|
||||
return it != _gens.end() && it->first <= api::new_timestamp();
|
||||
}
|
||||
|
||||
bool cdc::metadata::insert(db_clock::time_point tp, topology_description&& gen) {
|
||||
if (known_or_obsolete(tp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto now = api::new_timestamp();
|
||||
auto it = gen_used_at(now);
|
||||
|
||||
if (it != _gens.end()) {
|
||||
// Garbage-collect generations that will no longer be used.
|
||||
it = _gens.erase(_gens.begin(), it);
|
||||
|
||||
}
|
||||
|
||||
_gens.insert_or_assign(to_ts(tp), std::move(gen));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cdc::metadata::prepare(db_clock::time_point tp) {
|
||||
if (known_or_obsolete(tp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto ts = to_ts(tp);
|
||||
auto emplaced = _gens.emplace(to_ts(tp), std::nullopt).second;
|
||||
|
||||
if (_last_stream_timestamp != api::missing_timestamp) {
|
||||
auto last_correct_gen = gen_used_at(_last_stream_timestamp);
|
||||
if (emplaced && last_correct_gen != _gens.end() && last_correct_gen->first == ts) {
|
||||
cdc_log.error(
|
||||
"just learned about a CDC generation newer than the one used the last time"
|
||||
" streams were retrieved. This generation, or some newer one, should have"
|
||||
" been used instead (new generation's timestamp: {}, last time streams were retrieved: {})."
|
||||
" The new generation probably arrived too late due to a network partition"
|
||||
" and we've made a write using the wrong set streams.",
|
||||
format_timestamp(ts), format_timestamp(_last_stream_timestamp));
|
||||
}
|
||||
}
|
||||
|
||||
return emplaced;
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "db_clock.hh"
|
||||
#include "timestamp.hh"
|
||||
|
||||
namespace dht {
|
||||
class token;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
|
||||
class stream_id;
|
||||
class topology_description;
|
||||
|
||||
/* Represents the node's knowledge about CDC generations used in the cluster.
|
||||
* Used during writes to pick streams to which CDC log writes should be sent to
|
||||
* (i.e., to pick partition keys for these writes).
|
||||
*/
|
||||
class metadata final {
|
||||
// Note: we use db_clock (1ms resolution) for generation timestaps
|
||||
// (because we need to insert them into tables using columns of timestamp types,
|
||||
// and the native type of our columns' timestamp_type is db_clock::time_point).
|
||||
// On the other hand, timestamp_clock (1us resolution) is used for mutation timestamps,
|
||||
// and api::timestamp_type represents the number of ticks of a timestamp_clock::time_point since epoch.
|
||||
|
||||
using container_t = std::map<api::timestamp_type, std::optional<topology_description>>;
|
||||
container_t _gens;
|
||||
|
||||
/* The timestamp used in the last successful `get_stream` call. */
|
||||
api::timestamp_type _last_stream_timestamp = api::missing_timestamp;
|
||||
|
||||
container_t::const_iterator gen_used_at(api::timestamp_type ts) const;
|
||||
public:
|
||||
/* Is a generation with the given timestamp already known or superseded by a newer generation? */
|
||||
bool known_or_obsolete(db_clock::time_point) const;
|
||||
|
||||
/* Return the stream for the base partition whose token is `tok` to which a corresponding log write should go
|
||||
* according to the generation used at time `ts` (i.e, the latest generation whose timestamp is less or equal to `ts`).
|
||||
*
|
||||
* If the provided timestamp is too far away "into the future" (where "now" is defined according to our local clock),
|
||||
* we reject the get_stream query. This is because the resulting stream might belong to a generation which we don't
|
||||
* yet know about. The amount of leeway (how much "into the future" we allow `ts` to be) is defined
|
||||
* by the `cdc::generation_leeway` constant.
|
||||
*/
|
||||
stream_id get_stream(api::timestamp_type ts, dht::token tok);
|
||||
|
||||
/* Insert the generation given by `gen` with timestamp `ts` to be used by the `get_stream` function,
|
||||
* if the generation is not already known or older than the currently known ones.
|
||||
*
|
||||
* Returns true if the generation was inserted,
|
||||
* meaning that `get_stream` might return a stream from this generation (at some time points).
|
||||
*/
|
||||
bool insert(db_clock::time_point ts, topology_description&& gen);
|
||||
|
||||
/* Prepare for inserting a new generation whose timestamp is `ts`.
|
||||
* This method is not required to be called before `insert`, but it's here
|
||||
* to increase safety of `get_stream` calls in some situations. Use it if you:
|
||||
* 1. know that there is a new generation, but
|
||||
* 2. you didn't yet retrieve the generation's topology_description.
|
||||
*
|
||||
* After preparing a generation, if `get_stream` is supposed to return a stream from this generation
|
||||
* but we don't yet have the generation's data, it will reject the query to maintain consistency of streams.
|
||||
*
|
||||
* Returns true iff this generation is not obsolete and wasn't previously prepared nor inserted.
|
||||
*/
|
||||
bool prepare(db_clock::time_point ts);
|
||||
};
|
||||
|
||||
} // namespace cdc
|
||||
463
cdc/split.cc
463
cdc/split.cc
@@ -1,463 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "mutation.hh"
|
||||
#include "schema.hh"
|
||||
|
||||
#include "split.hh"
|
||||
#include "log.hh"
|
||||
|
||||
struct atomic_column_update {
|
||||
column_id id;
|
||||
atomic_cell cell;
|
||||
};
|
||||
|
||||
// see the comment inside `clustered_row_insert` for motivation for separating
|
||||
// nonatomic deletions from nonatomic updates
|
||||
struct nonatomic_column_deletion {
|
||||
column_id id;
|
||||
tombstone t;
|
||||
};
|
||||
|
||||
struct nonatomic_column_update {
|
||||
column_id id;
|
||||
utils::chunked_vector<std::pair<bytes, atomic_cell>> cells;
|
||||
};
|
||||
|
||||
struct static_row_update {
|
||||
gc_clock::duration ttl;
|
||||
std::vector<atomic_column_update> atomic_entries;
|
||||
std::vector<nonatomic_column_deletion> nonatomic_deletions;
|
||||
std::vector<nonatomic_column_update> nonatomic_updates;
|
||||
};
|
||||
|
||||
struct clustered_row_insert {
|
||||
gc_clock::duration ttl;
|
||||
clustering_key key;
|
||||
row_marker marker;
|
||||
std::vector<atomic_column_update> atomic_entries;
|
||||
std::vector<nonatomic_column_deletion> nonatomic_deletions;
|
||||
// INSERTs can't express updates of individual cells inside a non-atomic
|
||||
// (without deleting the entire field first), so no `nonatomic_updates` field
|
||||
// overwriting a nonatomic column inside an INSERT will be split into two changes:
|
||||
// one with a nonatomic deletion, and one with a nonatomic update
|
||||
};
|
||||
|
||||
struct clustered_row_update {
|
||||
gc_clock::duration ttl;
|
||||
clustering_key key;
|
||||
std::vector<atomic_column_update> atomic_entries;
|
||||
std::vector<nonatomic_column_deletion> nonatomic_deletions;
|
||||
std::vector<nonatomic_column_update> nonatomic_updates;
|
||||
};
|
||||
|
||||
struct clustered_row_deletion {
|
||||
clustering_key key;
|
||||
tombstone t;
|
||||
};
|
||||
|
||||
struct clustered_range_deletion {
|
||||
range_tombstone rt;
|
||||
};
|
||||
|
||||
struct partition_deletion {
|
||||
tombstone t;
|
||||
};
|
||||
|
||||
struct batch {
|
||||
std::vector<static_row_update> static_updates;
|
||||
std::vector<clustered_row_insert> clustered_inserts;
|
||||
std::vector<clustered_row_update> clustered_updates;
|
||||
std::vector<clustered_row_deletion> clustered_row_deletions;
|
||||
std::vector<clustered_range_deletion> clustered_range_deletions;
|
||||
std::optional<partition_deletion> partition_deletions;
|
||||
};
|
||||
|
||||
using set_of_changes = std::map<api::timestamp_type, batch>;
|
||||
|
||||
struct row_update {
|
||||
std::vector<atomic_column_update> atomic_entries;
|
||||
std::vector<nonatomic_column_deletion> nonatomic_deletions;
|
||||
std::vector<nonatomic_column_update> nonatomic_updates;
|
||||
};
|
||||
|
||||
static
|
||||
std::map<std::pair<api::timestamp_type, gc_clock::duration>, row_update>
|
||||
extract_row_updates(const row& r, column_kind ckind, const schema& schema) {
|
||||
std::map<std::pair<api::timestamp_type, gc_clock::duration>, row_update> result;
|
||||
r.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
auto& cdef = schema.column_at(ckind, id);
|
||||
if (cdef.is_atomic()) {
|
||||
auto view = cell.as_atomic_cell(cdef);
|
||||
auto timestamp_and_ttl = std::pair(
|
||||
view.timestamp(),
|
||||
view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0)
|
||||
);
|
||||
result[timestamp_and_ttl].atomic_entries.push_back({id, atomic_cell(*cdef.type, view)});
|
||||
return;
|
||||
}
|
||||
|
||||
cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) {
|
||||
auto desc = mview.materialize(*cdef.type);
|
||||
for (auto& [k, v]: desc.cells) {
|
||||
auto timestamp_and_ttl = std::pair(
|
||||
v.timestamp(),
|
||||
v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0)
|
||||
);
|
||||
auto& updates = result[timestamp_and_ttl].nonatomic_updates;
|
||||
if (updates.empty() || updates.back().id != id) {
|
||||
updates.push_back({id, {}});
|
||||
}
|
||||
updates.back().cells.push_back({std::move(k), std::move(v)});
|
||||
}
|
||||
|
||||
if (desc.tomb) {
|
||||
auto timestamp_and_ttl = std::pair(desc.tomb.timestamp, gc_clock::duration(0));
|
||||
result[timestamp_and_ttl].nonatomic_deletions.push_back({id, desc.tomb});
|
||||
}
|
||||
});
|
||||
});
|
||||
return result;
|
||||
};
|
||||
|
||||
set_of_changes extract_changes(const mutation& base_mutation, const schema& base_schema) {
|
||||
set_of_changes res;
|
||||
auto& p = base_mutation.partition();
|
||||
|
||||
auto sr_updates = extract_row_updates(p.static_row().get(), column_kind::static_column, base_schema);
|
||||
for (auto& [k, up]: sr_updates) {
|
||||
auto [timestamp, ttl] = k;
|
||||
res[timestamp].static_updates.push_back({
|
||||
ttl,
|
||||
std::move(up.atomic_entries),
|
||||
std::move(up.nonatomic_deletions),
|
||||
std::move(up.nonatomic_updates)
|
||||
});
|
||||
}
|
||||
|
||||
for (const rows_entry& cr : p.clustered_rows()) {
|
||||
auto cr_updates = extract_row_updates(cr.row().cells(), column_kind::regular_column, base_schema);
|
||||
|
||||
const auto& marker = cr.row().marker();
|
||||
auto marker_timestamp = marker.timestamp();
|
||||
auto marker_ttl = marker.is_expiring() ? marker.ttl() : gc_clock::duration(0);
|
||||
if (marker.is_live()) {
|
||||
// make sure that an entry corresponding to the row marker's timestamp and ttl is in the map
|
||||
(void)cr_updates[std::pair(marker_timestamp, marker_ttl)];
|
||||
}
|
||||
|
||||
auto is_insert = [&] (api::timestamp_type timestamp, gc_clock::duration ttl) {
|
||||
if (!marker.is_live()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return timestamp == marker_timestamp && ttl == marker_ttl;
|
||||
};
|
||||
|
||||
for (auto& [k, up]: cr_updates) {
|
||||
auto [timestamp, ttl] = k;
|
||||
|
||||
if (is_insert(timestamp, ttl)) {
|
||||
res[timestamp].clustered_inserts.push_back({
|
||||
ttl,
|
||||
cr.key(),
|
||||
marker,
|
||||
std::move(up.atomic_entries),
|
||||
std::move(up.nonatomic_deletions)
|
||||
});
|
||||
if (!up.nonatomic_updates.empty()) {
|
||||
// nonatomic updates cannot be expressed with an INSERT.
|
||||
res[timestamp].clustered_updates.push_back({
|
||||
ttl,
|
||||
cr.key(),
|
||||
{},
|
||||
{},
|
||||
std::move(up.nonatomic_updates)
|
||||
});
|
||||
}
|
||||
} else {
|
||||
res[timestamp].clustered_updates.push_back({
|
||||
ttl,
|
||||
cr.key(),
|
||||
std::move(up.atomic_entries),
|
||||
std::move(up.nonatomic_deletions),
|
||||
std::move(up.nonatomic_updates)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
auto row_tomb = cr.row().deleted_at().regular();
|
||||
if (row_tomb) {
|
||||
res[row_tomb.timestamp].clustered_row_deletions.push_back({cr.key(), row_tomb});
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& rt: p.row_tombstones()) {
|
||||
if (rt.tomb.timestamp != api::missing_timestamp) {
|
||||
res[rt.tomb.timestamp].clustered_range_deletions.push_back({rt});
|
||||
}
|
||||
}
|
||||
|
||||
auto partition_tomb_timestamp = p.partition_tombstone().timestamp;
|
||||
if (partition_tomb_timestamp != api::missing_timestamp) {
|
||||
res[partition_tomb_timestamp].partition_deletions = {p.partition_tombstone()};
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
|
||||
bool should_split(const mutation& base_mutation, const schema& base_schema) {
|
||||
auto& p = base_mutation.partition();
|
||||
|
||||
api::timestamp_type found_ts = api::missing_timestamp;
|
||||
std::optional<gc_clock::duration> found_ttl; // 0 = "no ttl"
|
||||
|
||||
auto check_or_set = [&] (api::timestamp_type ts, gc_clock::duration ttl) {
|
||||
if (found_ts != api::missing_timestamp && found_ts != ts) {
|
||||
return true;
|
||||
}
|
||||
found_ts = ts;
|
||||
|
||||
if (found_ttl && *found_ttl != ttl) {
|
||||
return true;
|
||||
}
|
||||
found_ttl = ttl;
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
bool had_static_row = false;
|
||||
|
||||
bool should_split = false;
|
||||
p.static_row().get().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
had_static_row = true;
|
||||
|
||||
auto& cdef = base_schema.column_at(column_kind::static_column, id);
|
||||
if (cdef.is_atomic()) {
|
||||
auto view = cell.as_atomic_cell(cdef);
|
||||
if (check_or_set(view.timestamp(), view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0))) {
|
||||
should_split = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) {
|
||||
auto desc = mview.materialize(*cdef.type);
|
||||
for (auto& [k, v]: desc.cells) {
|
||||
if (check_or_set(v.timestamp(), v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0))) {
|
||||
should_split = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (desc.tomb) {
|
||||
if (check_or_set(desc.tomb.timestamp, gc_clock::duration(0))) {
|
||||
should_split = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (should_split) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool had_clustered_row = false;
|
||||
|
||||
if (!p.clustered_rows().empty() && had_static_row) {
|
||||
return true;
|
||||
}
|
||||
for (const rows_entry& cr : p.clustered_rows()) {
|
||||
had_clustered_row = true;
|
||||
|
||||
const auto& marker = cr.row().marker();
|
||||
if (marker.is_live() && check_or_set(marker.timestamp(), marker.is_expiring() ? marker.ttl() : gc_clock::duration(0))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_insert = marker.is_live();
|
||||
|
||||
bool had_cells = false;
|
||||
cr.row().cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
had_cells = true;
|
||||
|
||||
auto& cdef = base_schema.column_at(column_kind::regular_column, id);
|
||||
if (cdef.is_atomic()) {
|
||||
auto view = cell.as_atomic_cell(cdef);
|
||||
if (check_or_set(view.timestamp(), view.is_live_and_has_ttl() ? view.ttl() : gc_clock::duration(0))) {
|
||||
should_split = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
cell.as_collection_mutation().with_deserialized(*cdef.type, [&] (collection_mutation_view_description mview) {
|
||||
for (auto& [k, v]: mview.cells) {
|
||||
if (check_or_set(v.timestamp(), v.is_live_and_has_ttl() ? v.ttl() : gc_clock::duration(0))) {
|
||||
should_split = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_insert) {
|
||||
// nonatomic updates cannot be expressed with an INSERT.
|
||||
should_split = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (mview.tomb) {
|
||||
if (check_or_set(mview.tomb.timestamp, gc_clock::duration(0))) {
|
||||
should_split = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (should_split) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto row_tomb = cr.row().deleted_at().regular();
|
||||
if (row_tomb) {
|
||||
if (had_cells) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// there were no cells, so no ttl
|
||||
assert(!found_ttl);
|
||||
if (found_ts != api::missing_timestamp && found_ts != row_tomb.timestamp) {
|
||||
return true;
|
||||
}
|
||||
|
||||
found_ts = row_tomb.timestamp;
|
||||
}
|
||||
}
|
||||
|
||||
if (!p.row_tombstones().empty() && (had_static_row || had_clustered_row)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (const auto& rt: p.row_tombstones()) {
|
||||
if (rt.tomb) {
|
||||
if (found_ts != api::missing_timestamp && found_ts != rt.tomb.timestamp) {
|
||||
return true;
|
||||
}
|
||||
|
||||
found_ts = rt.tomb.timestamp;
|
||||
}
|
||||
}
|
||||
|
||||
if (p.partition_tombstone().timestamp != api::missing_timestamp
|
||||
&& (!p.row_tombstones().empty() || had_static_row || had_clustered_row)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// A mutation with no timestamp will be split into 0 mutations
|
||||
return found_ts == api::missing_timestamp;
|
||||
}
|
||||
|
||||
void for_each_change(const mutation& base_mutation, const schema_ptr& base_schema,
|
||||
seastar::noncopyable_function<void(mutation, api::timestamp_type, bytes, int&)> f) {
|
||||
auto changes = extract_changes(base_mutation, *base_schema);
|
||||
auto pk = base_mutation.key();
|
||||
|
||||
for (auto& [change_ts, btch] : changes) {
|
||||
auto tuuid = timeuuid_type->decompose(generate_timeuuid(change_ts));
|
||||
int batch_no = 0;
|
||||
|
||||
for (auto& sr_update : btch.static_updates) {
|
||||
mutation m(base_schema, pk);
|
||||
for (auto& atomic_update : sr_update.atomic_entries) {
|
||||
auto& cdef = base_schema->column_at(column_kind::static_column, atomic_update.id);
|
||||
m.set_static_cell(cdef, std::move(atomic_update.cell));
|
||||
}
|
||||
for (auto& nonatomic_delete : sr_update.nonatomic_deletions) {
|
||||
auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_delete.id);
|
||||
m.set_static_cell(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
|
||||
}
|
||||
for (auto& nonatomic_update : sr_update.nonatomic_updates) {
|
||||
auto& cdef = base_schema->column_at(column_kind::static_column, nonatomic_update.id);
|
||||
m.set_static_cell(cdef, collection_mutation_description{{}, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
|
||||
}
|
||||
f(std::move(m), change_ts, tuuid, batch_no);
|
||||
}
|
||||
|
||||
for (auto& cr_insert : btch.clustered_inserts) {
|
||||
mutation m(base_schema, pk);
|
||||
|
||||
auto& row = m.partition().clustered_row(*base_schema, cr_insert.key);
|
||||
for (auto& atomic_update : cr_insert.atomic_entries) {
|
||||
auto& cdef = base_schema->column_at(column_kind::regular_column, atomic_update.id);
|
||||
row.cells().apply(cdef, std::move(atomic_update.cell));
|
||||
}
|
||||
for (auto& nonatomic_delete : cr_insert.nonatomic_deletions) {
|
||||
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_delete.id);
|
||||
row.cells().apply(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
|
||||
}
|
||||
row.apply(cr_insert.marker);
|
||||
|
||||
f(std::move(m), change_ts, tuuid, batch_no);
|
||||
}
|
||||
|
||||
for (auto& cr_update : btch.clustered_updates) {
|
||||
mutation m(base_schema, pk);
|
||||
|
||||
auto& row = m.partition().clustered_row(*base_schema, cr_update.key).cells();
|
||||
for (auto& atomic_update : cr_update.atomic_entries) {
|
||||
auto& cdef = base_schema->column_at(column_kind::regular_column, atomic_update.id);
|
||||
row.apply(cdef, std::move(atomic_update.cell));
|
||||
}
|
||||
for (auto& nonatomic_delete : cr_update.nonatomic_deletions) {
|
||||
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_delete.id);
|
||||
row.apply(cdef, collection_mutation_description{nonatomic_delete.t, {}}.serialize(*cdef.type));
|
||||
}
|
||||
for (auto& nonatomic_update : cr_update.nonatomic_updates) {
|
||||
auto& cdef = base_schema->column_at(column_kind::regular_column, nonatomic_update.id);
|
||||
row.apply(cdef, collection_mutation_description{{}, std::move(nonatomic_update.cells)}.serialize(*cdef.type));
|
||||
}
|
||||
|
||||
f(std::move(m), change_ts, tuuid, batch_no);
|
||||
}
|
||||
|
||||
for (auto& cr_delete : btch.clustered_row_deletions) {
|
||||
mutation m(base_schema, pk);
|
||||
m.partition().apply_delete(*base_schema, cr_delete.key, cr_delete.t);
|
||||
f(std::move(m), change_ts, tuuid, batch_no);
|
||||
}
|
||||
|
||||
for (auto& crange_delete : btch.clustered_range_deletions) {
|
||||
mutation m(base_schema, pk);
|
||||
m.partition().apply_delete(*base_schema, crange_delete.rt);
|
||||
f(std::move(m), change_ts, tuuid, batch_no);
|
||||
}
|
||||
|
||||
if (btch.partition_deletions) {
|
||||
mutation m(base_schema, pk);
|
||||
m.partition().apply(btch.partition_deletions->t);
|
||||
f(std::move(m), change_ts, tuuid, batch_no);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cdc
|
||||
38
cdc/split.hh
38
cdc/split.hh
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "schema_fwd.hh"
|
||||
#include "timestamp.hh"
|
||||
#include "bytes.hh"
|
||||
#include <seastar/util/noncopyable_function.hh>
|
||||
|
||||
class mutation;
|
||||
|
||||
namespace cdc {
|
||||
|
||||
bool should_split(const mutation& base_mutation, const schema& base_schema);
|
||||
void for_each_change(const mutation& base_mutation, const schema_ptr& base_schema,
|
||||
seastar::noncopyable_function<void(mutation, api::timestamp_type, bytes, int&)>);
|
||||
|
||||
}
|
||||
120
cdc/stats.hh
120
cdc/stats.hh
@@ -1,120 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <seastar/core/metrics_registration.hh>
|
||||
#include "enum_set.hh"
|
||||
#include "utils/histogram.hh"
|
||||
#include "utils/estimated_histogram.hh"
|
||||
|
||||
namespace cdc {
|
||||
|
||||
class stats final {
|
||||
seastar::metrics::metric_groups _metrics;
|
||||
|
||||
public:
|
||||
enum class part_type {
|
||||
STATIC_ROW,
|
||||
CLUSTERING_ROW,
|
||||
MAP,
|
||||
SET,
|
||||
LIST,
|
||||
UDT,
|
||||
RANGE_TOMBSTONE,
|
||||
PARTITION_DELETE,
|
||||
ROW_DELETE,
|
||||
|
||||
MAX
|
||||
};
|
||||
|
||||
using part_type_set = enum_set<super_enum<part_type,
|
||||
part_type::STATIC_ROW,
|
||||
part_type::CLUSTERING_ROW,
|
||||
part_type::MAP,
|
||||
part_type::SET,
|
||||
part_type::LIST,
|
||||
part_type::UDT,
|
||||
part_type::RANGE_TOMBSTONE,
|
||||
part_type::PARTITION_DELETE,
|
||||
part_type::ROW_DELETE
|
||||
>>;
|
||||
|
||||
struct parts_touched_stats final {
|
||||
std::array<uint64_t, (size_t)part_type::MAX> count = {};
|
||||
|
||||
inline void apply(part_type_set parts_set) {
|
||||
for (part_type idx : parts_set) {
|
||||
count[(size_t)idx]++;
|
||||
}
|
||||
}
|
||||
|
||||
void register_metrics(seastar::metrics::metric_groups& metrics, std::string_view suffix);
|
||||
};
|
||||
|
||||
struct counters final {
|
||||
uint64_t unsplit_count = 0;
|
||||
uint64_t split_count = 0;
|
||||
uint64_t preimage_selects = 0;
|
||||
uint64_t with_preimage_count = 0;
|
||||
uint64_t with_postimage_count = 0;
|
||||
|
||||
parts_touched_stats touches;
|
||||
};
|
||||
|
||||
counters counters_total;
|
||||
counters counters_failed;
|
||||
|
||||
stats();
|
||||
};
|
||||
|
||||
// Contains the details on what happened during a CDC operation.
|
||||
struct operation_details final {
|
||||
stats::part_type_set touched_parts;
|
||||
bool was_split = false;
|
||||
bool had_preimage = false;
|
||||
bool had_postimage = false;
|
||||
};
|
||||
|
||||
// This object tracks the lifetime of write handlers related to one CDC operation. After all
|
||||
// write handlers for the operation finish, CDC metrics are updated.
|
||||
class operation_result_tracker final {
|
||||
stats& _stats;
|
||||
operation_details _details;
|
||||
bool _failed;
|
||||
|
||||
public:
|
||||
operation_result_tracker(stats& stats, operation_details details)
|
||||
: _stats(stats)
|
||||
, _details(details)
|
||||
, _failed(false)
|
||||
{}
|
||||
~operation_result_tracker();
|
||||
|
||||
void on_mutation_failed() {
|
||||
_failed = true;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -22,10 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "seastar/core/file.hh"
|
||||
#include "seastar/core/reactor.hh"
|
||||
#include "utils/disk-error-handler.hh"
|
||||
|
||||
#include "seastarx.hh"
|
||||
#include "disk-error-handler.hh"
|
||||
|
||||
class checked_file_impl : public file_impl {
|
||||
public:
|
||||
|
||||
@@ -19,23 +19,6 @@
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <seastar/core/print.hh>
|
||||
|
||||
#include "db_clock.hh"
|
||||
#include "timestamp.hh"
|
||||
|
||||
#include "clocks-impl.hh"
|
||||
|
||||
std::atomic<int64_t> clocks_offset;
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, db_clock::time_point tp) {
|
||||
auto t = db_clock::to_time_t(tp);
|
||||
::tm t_buf;
|
||||
return os << std::put_time(::gmtime_r(&t, &t_buf), "%Y/%m/%d %T");
|
||||
}
|
||||
|
||||
std::string format_timestamp(api::timestamp_type ts) {
|
||||
auto t = std::time_t(std::chrono::duration_cast<std::chrono::seconds>(api::timestamp_clock::duration(ts)).count());
|
||||
::tm t_buf;
|
||||
return format("{}", std::put_time(::gmtime_r(&t, &t_buf), "%Y/%m/%d %T"));
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
|
||||
#include <functional>
|
||||
#include "keys.hh"
|
||||
#include "schema_fwd.hh"
|
||||
#include "schema.hh"
|
||||
#include "range.hh"
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "schema_fwd.hh"
|
||||
#include "position_in_partition.hh"
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
|
||||
// Represents a non-contiguous subset of clustering_key domain of a particular schema.
|
||||
// Can be treated like an ordered and non-overlapping sequence of position_range:s.
|
||||
class clustering_interval_set {
|
||||
// Needed to make position_in_partition comparable, required by boost::icl::interval_set.
|
||||
class position_in_partition_with_schema {
|
||||
schema_ptr _schema;
|
||||
position_in_partition _pos;
|
||||
public:
|
||||
position_in_partition_with_schema()
|
||||
: _pos(position_in_partition::for_static_row())
|
||||
{ }
|
||||
position_in_partition_with_schema(schema_ptr s, position_in_partition pos)
|
||||
: _schema(std::move(s))
|
||||
, _pos(std::move(pos))
|
||||
{ }
|
||||
bool operator<(const position_in_partition_with_schema& other) const {
|
||||
return position_in_partition::less_compare(*_schema)(_pos, other._pos);
|
||||
}
|
||||
bool operator==(const position_in_partition_with_schema& other) const {
|
||||
return position_in_partition::equal_compare(*_schema)(_pos, other._pos);
|
||||
}
|
||||
const position_in_partition& position() const { return _pos; }
|
||||
};
|
||||
private:
|
||||
// We want to represent intervals of clustering keys, not position_in_partitions,
|
||||
// but clustering_key domain is not enough to represent all kinds of clustering ranges.
|
||||
// All intervals in this set are of the form [x, y).
|
||||
using set_type = boost::icl::interval_set<position_in_partition_with_schema>;
|
||||
using interval = boost::icl::interval<position_in_partition_with_schema>;
|
||||
set_type _set;
|
||||
public:
|
||||
clustering_interval_set() = default;
|
||||
// Constructs from legacy clustering_row_ranges
|
||||
clustering_interval_set(const schema& s, const query::clustering_row_ranges& ranges) {
|
||||
for (auto&& r : ranges) {
|
||||
add(s, position_range::from_range(r));
|
||||
}
|
||||
}
|
||||
query::clustering_row_ranges to_clustering_row_ranges() const {
|
||||
query::clustering_row_ranges result;
|
||||
for (position_range r : *this) {
|
||||
result.push_back(query::clustering_range::make(
|
||||
{r.start().key(), r.start()._bound_weight != bound_weight::after_all_prefixed},
|
||||
{r.end().key(), r.end()._bound_weight == bound_weight::after_all_prefixed}));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
class position_range_iterator : public std::iterator<std::input_iterator_tag, const position_range> {
|
||||
set_type::iterator _i;
|
||||
public:
|
||||
position_range_iterator(set_type::iterator i) : _i(i) {}
|
||||
position_range operator*() const {
|
||||
// FIXME: Produce position_range view. Not performance critical yet.
|
||||
const interval::interval_type& iv = *_i;
|
||||
return position_range{iv.lower().position(), iv.upper().position()};
|
||||
}
|
||||
bool operator==(const position_range_iterator& other) const { return _i == other._i; }
|
||||
bool operator!=(const position_range_iterator& other) const { return _i != other._i; }
|
||||
position_range_iterator& operator++() {
|
||||
++_i;
|
||||
return *this;
|
||||
}
|
||||
position_range_iterator operator++(int) {
|
||||
auto tmp = *this;
|
||||
++_i;
|
||||
return tmp;
|
||||
}
|
||||
};
|
||||
static interval::type make_interval(const schema& s, const position_range& r) {
|
||||
assert(r.start().has_clustering_key());
|
||||
assert(r.end().has_clustering_key());
|
||||
return interval::right_open(
|
||||
position_in_partition_with_schema(s.shared_from_this(), r.start()),
|
||||
position_in_partition_with_schema(s.shared_from_this(), r.end()));
|
||||
}
|
||||
public:
|
||||
bool equals(const schema& s, const clustering_interval_set& other) const {
|
||||
return boost::equal(_set, other._set);
|
||||
}
|
||||
bool contains(const schema& s, position_in_partition_view pos) const {
|
||||
// FIXME: Avoid copy
|
||||
return _set.find(position_in_partition_with_schema(s.shared_from_this(), position_in_partition(pos))) != _set.end();
|
||||
}
|
||||
// Returns true iff this set is fully contained in the other set.
|
||||
bool contained_in(clustering_interval_set& other) const {
|
||||
return boost::icl::within(_set, other._set);
|
||||
}
|
||||
bool overlaps(const schema& s, const position_range& range) const {
|
||||
// FIXME: Avoid copy
|
||||
auto r = _set.equal_range(make_interval(s, range));
|
||||
return r.first != r.second;
|
||||
}
|
||||
// Adds given clustering range to this interval set.
|
||||
// The range may overlap with this set.
|
||||
void add(const schema& s, const position_range& r) {
|
||||
_set += make_interval(s, r);
|
||||
}
|
||||
void add(const schema& s, const clustering_interval_set& other) {
|
||||
for (auto&& r : other) {
|
||||
add(s, r);
|
||||
}
|
||||
}
|
||||
position_range_iterator begin() const { return {_set.begin()}; }
|
||||
position_range_iterator end() const { return {_set.end()}; }
|
||||
friend std::ostream& operator<<(std::ostream&, const clustering_interval_set&);
|
||||
};
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "schema_fwd.hh"
|
||||
#include "schema.hh"
|
||||
#include "query-request.hh"
|
||||
|
||||
namespace query {
|
||||
|
||||
@@ -21,8 +21,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <json/json.h>
|
||||
|
||||
#include "bytes.hh"
|
||||
|
||||
class schema;
|
||||
|
||||
@@ -21,8 +21,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
// combine two sorted uniqued sequences into a single sorted sequence
|
||||
// unique elements are copied, duplicate elements are merged with a
|
||||
// binary function.
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "schema.hh"
|
||||
#include "collection_mutation.hh"
|
||||
|
||||
class atomic_cell;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user