merge: Allow accessing Scylla system tables from alternator
Merged patch series from Piotr Sarna: This series allows reading rows from Scylla's system tables via alternator by using a virtual interface. If a Query or Scan request intercepts a table name with the following pattern: .scylla.alternator.KEYSPACE_NAME.TABLE_NAME, it will read the data from Scylla's KEYSPACE_NAME.TABLE_NAME table. The interface is expected to only return data for Scylla system tables and trying to access regular tables via this interface is expected to return an error. This series comes with tests (alternator-test, scylla_only). Fixes #6122 Tests: alternator-test(local,remote (to verify that scylla_only works) Piotr Sarna (5): alternator: add fallback serialization for all types alternator: add fetching static columns if they exist alternator: add a way of accessing system tables from alternator alternator-test: add scylla-only test for querying system tables docs: add an entry about accessing Scylla system tables alternator-test/test_system_tables.py | 61 +++++++++++++++++++++++++++ alternator/executor.cc | 38 ++++++++++++++++- alternator/executor.hh | 1 + alternator/serialization.cc | 11 +++-- docs/alternator/alternator.md | 15 +++++++ 5 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 alternator-test/test_system_tables.py
This commit is contained in:
@@ -187,6 +187,25 @@ static schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& r
|
||||
}
|
||||
}
|
||||
|
||||
static std::tuple<bool, std::string_view, std::string_view> try_get_internal_table(std::string_view table_name) {
|
||||
size_t it = table_name.find(executor::INTERNAL_TABLE_PREFIX);
|
||||
if (it != 0) {
|
||||
return {false, "", ""};
|
||||
}
|
||||
table_name.remove_prefix(executor::INTERNAL_TABLE_PREFIX.size());
|
||||
size_t delim = table_name.find_first_of('.');
|
||||
if (delim == std::string_view::npos) {
|
||||
return {false, "", ""};
|
||||
}
|
||||
std::string_view ks_name = table_name.substr(0, delim);
|
||||
table_name.remove_prefix(ks_name.size() + 1);
|
||||
// Only internal keyspaces can be accessed to avoid leakage
|
||||
if (!is_internal_keyspace(sstring(ks_name))) {
|
||||
return {false, "", ""};
|
||||
}
|
||||
return {true, ks_name, table_name};
|
||||
}
|
||||
|
||||
// get_table_or_view() is similar to to get_table(), except it returns either
|
||||
// a table or a materialized view from which to read, based on the TableName
|
||||
// and optional IndexName in the request. Only requests like Query and Scan
|
||||
@@ -196,6 +215,17 @@ static std::pair<schema_ptr, table_or_view_type>
|
||||
get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
|
||||
table_or_view_type type = table_or_view_type::base;
|
||||
std::string table_name = get_table_name(request);
|
||||
|
||||
auto [is_internal_table, internal_ks_name, internal_table_name] = try_get_internal_table(table_name);
|
||||
if (is_internal_table) {
|
||||
try {
|
||||
return { proxy.get_db().local().find_schema(sstring(internal_ks_name), sstring(internal_table_name)), type };
|
||||
} catch (no_such_column_family&) {
|
||||
throw api_error("ResourceNotFoundException",
|
||||
format("Requested resource not found: Internal table: {}.{} not found", internal_ks_name, internal_table_name));
|
||||
}
|
||||
}
|
||||
|
||||
std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
|
||||
const rjson::value* index_name = rjson::find(request, "IndexName");
|
||||
std::string orig_table_name;
|
||||
@@ -684,6 +714,10 @@ future<executor::request_return_type> executor::create_table(client_state& clien
|
||||
_stats.api_operations.create_table++;
|
||||
elogger.trace("Creating table {}", request);
|
||||
std::string table_name = get_table_name(request);
|
||||
if (table_name.find(INTERNAL_TABLE_PREFIX) == 0) {
|
||||
return make_ready_future<request_return_type>(api_error("ValidationException",
|
||||
format("Prefix {} is reserved for accessing internal tables", INTERNAL_TABLE_PREFIX)));
|
||||
}
|
||||
std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
|
||||
const rjson::value& attribute_definitions = request["AttributeDefinitions"];
|
||||
|
||||
@@ -2920,8 +2954,10 @@ static future<executor::request_return_type> do_query(schema_ptr schema,
|
||||
|
||||
auto regular_columns = boost::copy_range<query::column_id_vector>(
|
||||
schema->regular_columns() | boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.id; }));
|
||||
auto static_columns = boost::copy_range<query::column_id_vector>(
|
||||
schema->static_columns() | boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.id; }));
|
||||
auto selection = cql3::selection::selection::wildcard(schema);
|
||||
auto partition_slice = query::partition_slice(std::move(ck_bounds), {}, std::move(regular_columns), selection->get_query_options());
|
||||
auto partition_slice = query::partition_slice(std::move(ck_bounds), std::move(static_columns), std::move(regular_columns), selection->get_query_options());
|
||||
auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, query::max_partitions);
|
||||
|
||||
auto query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, std::move(permit));
|
||||
|
||||
@@ -50,6 +50,7 @@ public:
|
||||
stats _stats;
|
||||
static constexpr auto ATTRS_COLUMN_NAME = ":attrs";
|
||||
static constexpr auto KEYSPACE_NAME_PREFIX = "alternator_";
|
||||
static constexpr std::string_view INTERNAL_TABLE_PREFIX = ".scylla.alternator.";
|
||||
|
||||
executor(service::storage_proxy& proxy, service::migration_manager& mm, smp_service_group ssg)
|
||||
: _proxy(proxy), _mm(mm), _ssg(ssg) {}
|
||||
|
||||
@@ -153,7 +153,9 @@ std::string type_to_string(data_type type) {
|
||||
};
|
||||
auto it = types.find(type);
|
||||
if (it == types.end()) {
|
||||
throw std::runtime_error(format("Unknown type {}", type->name()));
|
||||
// fall back to string, in order to be able to present
|
||||
// internal Scylla types in a human-readable way
|
||||
return "S";
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
@@ -205,8 +207,11 @@ rjson::value json_key_column_value(bytes_view cell, const column_definition& col
|
||||
auto s = to_json_string(*decimal_type, bytes(cell));
|
||||
return rjson::from_string(s);
|
||||
} else {
|
||||
// We shouldn't get here, we shouldn't see such key columns.
|
||||
throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
|
||||
// Support for arbitrary key types is useful for parsing values of virtual tables,
|
||||
// which can involve any type supported by Scylla.
|
||||
// In order to guarantee that the returned type is parsable by alternator clients,
|
||||
// they are represented simply as strings.
|
||||
return rjson::from_string(column.type->to_string(bytes(cell)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -168,6 +168,21 @@ implemented, with the following limitations:
|
||||
Those are different from the current DynamoDB metrics, but Scylla's
|
||||
monitoring is rather advanced and provide more insights to the internals.
|
||||
|
||||
## Alternator-specific API
|
||||
|
||||
### Accessing system tables from Scylla
|
||||
* Scylla exposes lots of useful information via its internal system tables,
|
||||
which can be found in system keyspaces: 'system', 'system\_auth', etc.
|
||||
In order to access to these tables via alternator interface,
|
||||
Scan and Query requests can use a special table name:
|
||||
.scylla.alternator.KEYSPACE\_NAME.TABLE\_NAME
|
||||
which will return results fetched from corresponding Scylla table.
|
||||
This interface can be used only to fetch data from system tables.
|
||||
Attempts to read regular tables via the virtual interface will result
|
||||
in an error.
|
||||
Example: in order to query the contents of Scylla's system.large_rows,
|
||||
pass TableName='.scylla.alternator.system.large_rows' to a Query/Scan request.
|
||||
|
||||
## Alternator design and implementation
|
||||
|
||||
This section provides only a very brief introduction to Alternator's
|
||||
|
||||
61
test/alternator/test_system_tables.py
Normal file
61
test/alternator/test_system_tables.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Copyright 2020 ScyllaDB
|
||||
#
|
||||
# This file is part of Scylla.
|
||||
#
|
||||
# Scylla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Scylla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Tests for accessing alternator-only system tables (from Scylla).
|
||||
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
from boto3.dynamodb.conditions import Key
|
||||
|
||||
internal_prefix = '.scylla.alternator.'
|
||||
|
||||
# Test that fetching key columns from system tables works
|
||||
def test_fetch_from_system_tables(scylla_only, dynamodb):
|
||||
client = dynamodb.meta.client
|
||||
tables_response = client.scan(TableName=internal_prefix+'system_schema.tables',
|
||||
AttributesToGet=['keyspace_name','table_name'])
|
||||
|
||||
for item in tables_response['Items']:
|
||||
ks_name = item['keyspace_name']
|
||||
table_name = item['table_name']
|
||||
|
||||
if not 'system' in ks_name:
|
||||
continue
|
||||
|
||||
col_response = client.query(TableName=internal_prefix+'system_schema.columns',
|
||||
KeyConditionExpression=Key('keyspace_name').eq(ks_name) & Key('table_name').eq(table_name))
|
||||
|
||||
key_columns = [item['column_name'] for item in col_response['Items'] if item['kind'] == 'clustering' or item['kind'] == 'partition_key']
|
||||
qualified_name = "{}{}.{}".format(internal_prefix, ks_name, table_name)
|
||||
response = client.scan(TableName=qualified_name, AttributesToGet=key_columns)
|
||||
print(ks_name, table_name, response)
|
||||
|
||||
def test_block_access_to_non_system_tables_with_virtual_interface(scylla_only, test_table_s, dynamodb):
|
||||
client = dynamodb.meta.client
|
||||
with pytest.raises(ClientError, match='ResourceNotFoundException.*{}'.format(internal_prefix)):
|
||||
tables_response = client.scan(TableName="{}alternator_{}.{}".format(internal_prefix, test_table_s.name, test_table_s.name))
|
||||
|
||||
def test_block_creating_tables_with_reserved_prefix(scylla_only, dynamodb):
|
||||
client = dynamodb.meta.client
|
||||
for wrong_name_postfix in ['', 'a', 'xxx', 'system_auth.roles', 'table_name']:
|
||||
with pytest.raises(ClientError, match=internal_prefix):
|
||||
dynamodb.create_table(TableName=internal_prefix+wrong_name_postfix,
|
||||
BillingMode='PAY_PER_REQUEST',
|
||||
KeySchema=[{'AttributeName':'p', 'KeyType':'HASH'}],
|
||||
AttributeDefinitions=[{'AttributeName':'p', 'AttributeType': 'S'}]
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user