Merge 'api/storage service: validate table names' from Benny Halevy

This series fixes a couple issues around generating and handling of no_such_keyspace and no_such_column_family exceptions.

First, it removes std::throw_with_nested around their throw sites in the respective database::find_* functions.
Fixes #9753

And then, it introduces a `validate_tables` helper in api/storage_service.cc that generates a `bad_param_exception` in order to set the correct http response status if a non-existing table name is provided in the `cf` http request parameter.
Fixes #9754

The series also adds a test for the REST API under test/rest_api that verifies the storage_service enable/disable auto_compaction api and checks the error codes for non-existing keyspace or table.

Test: unit(dev)

Closes #9755

* github.com:scylladb/scylla:
  api: storage_service: add parse_tables
  database: un-nest no_such_keyspace and no_such_column_family exceptions
  database: throw internal error when failing uuid returned by find_uuid
  database: find_uuid: throw no_such_column_family exception if ks/cf were not found
  test: rest_api: add storage_service test
  test: add basic rest api test
  test: cql-pytest: wait for rest api when starting scylla
This commit is contained in:
Nadav Har'El
2021-12-08 16:54:48 +02:00
13 changed files with 372 additions and 34 deletions

View File

@@ -94,13 +94,6 @@ inline std::vector<sstring> split(const sstring& text, const char* separator) {
return boost::split(tokens, text, boost::is_any_of(separator));
}
/**
* Split a column family parameter
*/
inline std::vector<sstring> split_cf(const sstring& cf) {
return split(cf, ",");
}
/**
* A helper function to sum values on an a distributed object that
* has a get_stats method.

View File

@@ -59,8 +59,8 @@ std::tuple<sstring, sstring> parse_fully_qualified_cf_name(sstring name) {
const utils::UUID& get_uuid(const sstring& ks, const sstring& cf, const database& db) {
try {
return db.find_uuid(ks, cf);
} catch (std::out_of_range& e) {
throw bad_param_exception(format("Column family '{}:{}' not found", ks, cf));
} catch (no_such_column_family& e) {
throw bad_param_exception(e.what());
}
}

View File

@@ -76,6 +76,25 @@ static sstring validate_keyspace(http_context& ctx, const parameters& param) {
throw bad_param_exception("Keyspace " + param["keyspace"] + " Does not exist");
}
// splits a request parameter assumed to hold a comma-separated list of table names
// verify that the tables are found, otherwise a bad_param_exception exception is thrown
// containing the description of the respective no_such_column_family error.
static std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name) {
auto it = query_params.find(param_name);
if (it == query_params.end()) {
return {};
}
std::vector<sstring> names = split(it->second, ",");
try {
for (const auto& table_name : names) {
ctx.db.local().find_column_family(ks_name, table_name);
}
} catch (const no_such_column_family& e) {
throw bad_param_exception(e.what());
}
return names;
}
static ss::token_range token_range_endpoints_to_json(const dht::token_range_endpoints& d) {
ss::token_range r;
r.start_token = d._start_token;
@@ -99,7 +118,7 @@ using ks_cf_func = std::function<future<json::json_return_type>(http_context&, s
static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
return [&ctx, f = std::move(f)](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto column_families = split_cf(req->get_query_param("cf"));
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
if (column_families.empty()) {
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
}
@@ -582,7 +601,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto column_families = split_cf(req->get_query_param("cf"));
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
if (column_families.empty()) {
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
}
@@ -606,7 +625,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto column_families = split_cf(req->get_query_param("cf"));
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
if (column_families.empty()) {
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
}
@@ -653,7 +672,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto column_families = split_cf(req->get_query_param("cf"));
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
if (column_families.empty()) {
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
}
@@ -993,14 +1012,14 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::enable_auto_compaction.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto tables = split_cf(req->get_query_param("cf"));
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
return set_tables_autocompaction(ctx, ss.local(), keyspace, tables, true);
});
ss::disable_auto_compaction.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto tables = split_cf(req->get_query_param("cf"));
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
return set_tables_autocompaction(ctx, ss.local(), keyspace, tables, false);
});

View File

@@ -953,7 +953,12 @@ future<> database::remove(const column_family& cf) noexcept {
future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_name, timestamp_func tsf, bool snapshot) {
auto& ks = find_keyspace(ks_name);
auto uuid = find_uuid(ks_name, cf_name);
auto cf = _column_families.at(uuid);
lw_shared_ptr<table> cf;
try {
cf = _column_families.at(uuid);
} catch (std::out_of_range&) {
on_internal_error(dblog, fmt::format("drop_column_family {}.{}: UUID={} not found", ks_name, cf_name, uuid));
}
co_await remove(*cf);
cf->clear_views();
co_return co_await cf->await_pending_ops().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
@@ -966,8 +971,8 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
const utils::UUID& database::find_uuid(std::string_view ks, std::string_view cf) const {
try {
return _ks_cf_to_uuid.at(std::make_pair(ks, cf));
} catch (...) {
throw std::out_of_range("");
} catch (std::out_of_range&) {
throw no_such_column_family(ks, cf);
}
}
@@ -978,16 +983,16 @@ const utils::UUID& database::find_uuid(const schema_ptr& schema) const {
keyspace& database::find_keyspace(std::string_view name) {
try {
return _keyspaces.at(name);
} catch (...) {
std::throw_with_nested(no_such_keyspace(name));
} catch (std::out_of_range&) {
throw no_such_keyspace(name);
}
}
const keyspace& database::find_keyspace(std::string_view name) const {
try {
return _keyspaces.at(name);
} catch (...) {
std::throw_with_nested(no_such_keyspace(name));
} catch (std::out_of_range&) {
throw no_such_keyspace(name);
}
}
@@ -1024,18 +1029,20 @@ std::vector<lw_shared_ptr<column_family>> database::get_non_system_column_famili
}
column_family& database::find_column_family(std::string_view ks_name, std::string_view cf_name) {
auto uuid = find_uuid(ks_name, cf_name);
try {
return find_column_family(find_uuid(ks_name, cf_name));
} catch (...) {
std::throw_with_nested(no_such_column_family(ks_name, cf_name));
return find_column_family(uuid);
} catch (no_such_column_family&) {
on_internal_error(dblog, fmt::format("find_column_family {}.{}: UUID={} not found", ks_name, cf_name, uuid));
}
}
const column_family& database::find_column_family(std::string_view ks_name, std::string_view cf_name) const {
auto uuid = find_uuid(ks_name, cf_name);
try {
return find_column_family(find_uuid(ks_name, cf_name));
} catch (...) {
std::throw_with_nested(no_such_column_family(ks_name, cf_name));
return find_column_family(uuid);
} catch (no_such_column_family&) {
on_internal_error(dblog, fmt::format("find_column_family {}.{}: UUID={} not found", ks_name, cf_name, uuid));
}
}
@@ -1043,7 +1050,7 @@ column_family& database::find_column_family(const utils::UUID& uuid) {
try {
return *_column_families.at(uuid);
} catch (...) {
std::throw_with_nested(no_such_column_family(uuid));
throw no_such_column_family(uuid);
}
}
@@ -1051,7 +1058,7 @@ const column_family& database::find_column_family(const utils::UUID& uuid) const
try {
return *_column_families.at(uuid);
} catch (...) {
std::throw_with_nested(no_such_column_family(uuid));
throw no_such_column_family(uuid);
}
}
@@ -1268,10 +1275,11 @@ std::vector<view_ptr> keyspace_metadata::views() const {
}
schema_ptr database::find_schema(const sstring& ks_name, const sstring& cf_name) const {
auto uuid = find_uuid(ks_name, cf_name);
try {
return find_schema(find_uuid(ks_name, cf_name));
} catch (std::out_of_range&) {
std::throw_with_nested(no_such_column_family(ks_name, cf_name));
return find_schema(uuid);
} catch (no_such_column_family&) {
on_internal_error(dblog, fmt::format("find_schema {}.{}: UUID={} not found", ks_name, cf_name, uuid));
}
}

View File

@@ -17,7 +17,10 @@ else:
pid = run.run_with_temporary_dir(cmd)
ip = run.pid_to_ip(pid)
run.wait_for_services(pid, [lambda: check_cql(ip)])
run.wait_for_services(pid, [
lambda: run.check_rest_api(ip),
lambda: check_cql(ip)
])
success = run.run_pytest(sys.path[0], ['--host', ip] + sys.argv[1:])
run.summary = 'Scylla tests pass' if success else 'Scylla tests failure'

View File

@@ -8,6 +8,7 @@ import shutil
import signal
import atexit
import tempfile
import requests
# run_with_temporary_dir() is a utility function for running a process, such
# as Scylla, Cassandra or Redis, inside its own new temporary directory,
@@ -260,6 +261,17 @@ def check_ssl_cql(ip):
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
check_cql(ip, ssl_context)
# Test that the Scylla REST API is serving.
# Can be used as a checker function with wait_for_services() below.
def check_rest_api(ip, port=10000):
try:
requests.get(f"http://{ip}:{port}/")
# getting "/" returns error status 404 but we don't care
# as long as the server returns it
except requests.exceptions.ConnectionError:
raise NotYetUp
# Any other exception may indicate a problem, and is passed to the caller.
# wait_for_services() waits for scylla to finish booting successfully and
# listen to services checked by the given "checkers". Raises an exception
# if we know Scylla did not boot properly (as soon as we know - not waiting

35
test/rest_api/README.md Normal file
View File

@@ -0,0 +1,35 @@
Tests for the Scylla REST API.
Tests use the requests library and the pytest frameworks
(both are available from Linux distributions, or with "pip install").
To run all tests using test.py, just run `./test.py api/run`.
To run all tests against an already-running local installation of Scylla,
just run `pytest`. The "--host" and "--api-port"
can be used to give a different location for the running Scylla.
More conveniently, we have a script - "run" - which
does all the work necessary to start Scylla,
and run the tests on it. The Scylla process is run in a
temporary directory which is automatically deleted when the test ends.
"run" automatically picks the most recently compiled version of Scylla in
`build/*/scylla` - but this choice of Scylla executable can be overridden with
the `SCYLLA` environment variable.
Additional options can be passed to "pytest" or to "run"
to control which tests to run:
* To run all tests in a single file, do `pytest test_system.py`.
* To run a single specific test, do `pytest test_system.py::test_system_uptime_ms`.
* To run the same test or tests 100 times, add the `--count=100` option.
This is faster than running `run` 100 times, because Scylla is only run
once, and also counts for you how many of the runs failed.
For `pytest` to support the `--count` option, you need to install a
pytest extension: `pip install pytest-repeat`
Additional useful pytest options, especially useful for debugging tests:
* -v: show the names of each individual test running instead of just dots.
* -s: show the full output of running tests (by default, pytest captures the test's output and only displays it if a test fails)

121
test/rest_api/conftest.py Normal file
View File

@@ -0,0 +1,121 @@
# Copyright 2021-present ScyllaDB
#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
# This file configures pytest for all tests in this directory, and also
# defines common test fixtures for all of them to use. A "fixture" is some
# setup which an invididual test requires to run; The fixture has setup code
# and teardown code, and if multiple tests require the same fixture, it can
# be set up only once - while still allowing the user to run individual tests
# and automatically setting up the fixtures they need.
import pytest
import requests
import ssl
import sys
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster, ConsistencyLevel, ExecutionProfile, EXEC_PROFILE_DEFAULT
from cassandra.policies import RoundRobinPolicy
# Use the util.py library from ../cql-pytest:
sys.path.insert(1, sys.path[0] + '/../cql-pytest')
from util import unique_name
# By default, tests run against a Scylla server listening
# on localhost:9042 for CQL and localhost:10000 for the REST API.
# Add the --host, --port, --ssl, or --api-port options to allow overiding these defaults.
def pytest_addoption(parser):
parser.addoption('--host', action='store', default='localhost',
help='Scylla server host to connect to')
parser.addoption('--port', action='store', default='9042',
help='Scylla CQL port to connect to')
parser.addoption('--ssl', action='store_true',
help='Connect to CQL via an encrypted TLSv1.2 connection')
parser.addoption('--api-port', action='store', default='10000',
help='server REST API port to connect to')
class RestApiSession:
def __init__(self, host, port):
self.host = host
self.port = port
self.session = requests.Session()
def send(self, method, path, params={}):
url=f"http://{self.host}:{self.port}/{path}"
if params:
sep = '?'
for key, value in params.items():
url += f"{sep}{key}={value}"
sep = '&'
req = self.session.prepare_request(requests.Request(method, url))
return self.session.send(req)
# "api" fixture: set up client object for communicating with Scylla API.
# The host/port combination of the server are determined by the --host and
# --port options, and defaults to localhost and 10000, respectively.
# We use scope="session" so that all tests will reuse the same client object.
@pytest.fixture(scope="session")
def rest_api(request):
host = request.config.getoption('host')
port = request.config.getoption('api_port')
return RestApiSession(host, port)
# "cql" fixture: set up client object for communicating with the CQL API.
# The host/port combination of the server are determined by the --host and
# --port options, and defaults to localhost and 9042, respectively.
# We use scope="session" so that all tests will reuse the same client object.
@pytest.fixture(scope="session")
def cql(request):
profile = ExecutionProfile(
load_balancing_policy=RoundRobinPolicy(),
consistency_level=ConsistencyLevel.LOCAL_QUORUM,
serial_consistency_level=ConsistencyLevel.LOCAL_SERIAL,
# The default timeout (in seconds) for execute() commands is 10, which
# should have been more than enough, but in some extreme cases with a
# very slow debug build running on a very busy machine and a very slow
# request (e.g., a DROP KEYSPACE needing to drop multiple tables)
# 10 seconds may not be enough, so let's increase it. See issue #7838.
request_timeout = 120)
if request.config.getoption('ssl'):
# Scylla does not support any earlier TLS protocol. If you try,
# you will get mysterious EOF errors (see issue #6971) :-(
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
else:
ssl_context = None
cluster = Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: profile},
contact_points=[request.config.getoption('host')],
port=request.config.getoption('port'),
# TODO: make the protocol version an option, to allow testing with
# different versions. If we drop this setting completely, it will
# mean pick the latest version supported by the client and the server.
protocol_version=4,
# Use the default superuser credentials, which work for both Scylla and Cassandra
auth_provider=PlainTextAuthProvider(username='cassandra', password='cassandra'),
ssl_context=ssl_context,
)
return cluster.connect()
# Until Cassandra 4, NetworkTopologyStrategy did not support the option
# replication_factor (https://issues.apache.org/jira/browse/CASSANDRA-14303).
# We want to allow these tests to run on Cassandra 3.* (for the convenience
# of developers who happen to have it installed), so we'll use the older
# syntax that needs to specify a DC name explicitly. For this, will have
# a "this_dc" fixture to figure out the name of the current DC, so it can be
# used in NetworkTopologyStrategy.
@pytest.fixture(scope="session")
def this_dc(cql):
yield cql.execute("SELECT data_center FROM system.local").one()[0]

4
test/rest_api/pytest.ini Normal file
View File

@@ -0,0 +1,4 @@
# Pytest configuration file. If we don't have one in this directory,
# pytest will look for one in our ancestor directories, and may find
# something irrelevant. So we should have one here, even if empty.
[pytest]

35
test/rest_api/run Executable file
View File

@@ -0,0 +1,35 @@
#!/usr/bin/env python3
import sys
# Use the run.py library from ../cql-pytest:
sys.path.insert(1, sys.path[0] + '/../cql-pytest')
import run
print('Scylla is: ' + run.scylla + '.')
ssl = '--ssl' in sys.argv
if ssl:
cmd = run.run_scylla_ssl_cql_cmd
check_cql = run.check_ssl_cql
else:
cmd = run.run_scylla_cmd
check_cql = run.check_cql
pid = run.run_with_temporary_dir(cmd)
ip = run.pid_to_ip(pid)
run.wait_for_services(pid, [
lambda: run.check_rest_api(ip),
lambda: run.check_cql(ip)
])
success = run.run_pytest(sys.path[0], ['--host', ip] + sys.argv[1:])
run.summary = 'Scylla tests pass' if success else 'Scylla tests failure'
exit(0 if success else 1)
# Note that the run.cleanup_all() function runs now, just like on any exit
# for any reason in this script. It will delete the temporary files and
# announce the failure or success of the test (printing run.summary).

1
test/rest_api/suite.yaml Normal file
View File

@@ -0,0 +1 @@
type: Run

View File

@@ -0,0 +1,87 @@
# Copyright 2021-present ScyllaDB
#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
import pytest
import sys
import requests
# Use the util.py library from ../cql-pytest:
sys.path.insert(1, sys.path[0] + '/../cql-pytest')
from util import unique_name, new_test_table
# "keyspace" function: Creates and returns a temporary keyspace to be
# used in tests that need a keyspace. The keyspace is created with RF=1,
def new_keyspace(cql, this_dc):
name = unique_name()
cql.execute(f"CREATE KEYSPACE {name} WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{this_dc}' : 1 }}")
return name
def test_storage_service_auto_compaction_keyspace(cql, this_dc, rest_api):
keyspace = new_keyspace(cql, this_dc)
# test empty keyspace
resp = rest_api.send("DELETE", f"storage_service/auto_compaction/{keyspace}")
resp.raise_for_status()
resp = rest_api.send("POST", f"storage_service/auto_compaction/{keyspace}")
resp.raise_for_status()
# test non-empty keyspace
with new_test_table(cql, keyspace, "a int, PRIMARY KEY (a)") as t:
resp = rest_api.send("DELETE", f"storage_service/auto_compaction/{keyspace}")
resp.raise_for_status()
resp = rest_api.send("POST", f"storage_service/auto_compaction/{keyspace}")
resp.raise_for_status()
# non-existing keyspace
resp = rest_api.send("POST", f"storage_service/auto_compaction/XXX")
assert resp.status_code == requests.codes.bad_request
cql.execute(f"DROP KEYSPACE {keyspace}")
def test_storage_service_auto_compaction_table(cql, this_dc, rest_api):
keyspace = new_keyspace(cql, this_dc)
with new_test_table(cql, keyspace, "a int, PRIMARY KEY (a)") as t:
test_table = t.split('.')[1]
resp = rest_api.send("DELETE", f"storage_service/auto_compaction/{keyspace}", { "cf": test_table })
resp.raise_for_status()
resp = rest_api.send("POST", f"storage_service/auto_compaction/{keyspace}", { "cf": test_table })
resp.raise_for_status()
# non-existing table
resp = rest_api.send("POST", f"storage_service/auto_compaction/{keyspace}", { "cf": "XXX" })
assert resp.status_code == requests.codes.bad_request
cql.execute(f"DROP KEYSPACE {keyspace}")
def test_storage_service_auto_compaction_tables(cql, this_dc, rest_api):
keyspace = new_keyspace(cql, this_dc)
with new_test_table(cql, keyspace, "a int, PRIMARY KEY (a)") as t0:
with new_test_table(cql, keyspace, "a int, PRIMARY KEY (a)") as t1:
test_tables = [t0.split('.')[1], t1.split('.')[1]]
resp = rest_api.send("DELETE", f"storage_service/auto_compaction/{keyspace}", { "cf": f"{test_tables[0]},{test_tables[1]}" })
resp.raise_for_status()
resp = rest_api.send("POST", f"storage_service/auto_compaction/{keyspace}", { "cf": f"{test_tables[0]},{test_tables[1]}" })
resp.raise_for_status()
# non-existing table
resp = rest_api.send("POST", f"storage_service/auto_compaction/{keyspace}", { "cf": f"{test_tables[0]},XXX" })
assert resp.status_code == requests.codes.bad_request
cql.execute(f"DROP KEYSPACE {keyspace}")

View File

@@ -0,0 +1,20 @@
# Copyright 2021-present ScyllaDB
#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
def test_system_uptime_ms(rest_api):
resp = rest_api.send('GET', "system/uptime_ms")
resp.raise_for_status()