From 219ac2bab5dcf19ac022c4e082bbd57b7a4425da Mon Sep 17 00:00:00 2001 From: Pavel Solodovnikov Date: Wed, 23 Dec 2020 23:45:52 +0300 Subject: [PATCH] large_data_handler: fix segmentation fault when constructing `data_value` from a `nullptr` It turns out that `cql_table_large_data_handler::record_large_rows` and `cql_table_large_data_handler::record_large_cells` were broken for reporting static cells and static rows from the very beginning: In case a large static cell or a large static row is encountered, it tries to execute `db::try_record` with `nullptr` additional values, denoting that there is no clustering key to be recorded. These values are next passed to `qctx.execute_cql()`, which creates `data_value` instances for each statement parameter, hence invoking `data_value(nullptr)`. This uses `const char*` overload which delegates to `std::string_view` ctor overload. It is UB to pass `nullptr` pointer to `std::string_view` ctor. Hence leading to segmentation faults in the aforementioned large data reporting code. What we want here is to make a null `data_value` instead, so just add an overload specifically for `std::nullptr_t`, which will create a null `data_value` with `text` type. A regression test is provided for the issue (written in `cql-pytest` framework). Tests: test/cql-pytest/test_large_cells_rows.py Fixes: #6780 Signed-off-by: Pavel Solodovnikov Message-Id: <20201223204552.61081-1-pa.solodovnikov@scylladb.com> --- db/large_data_handler.cc | 4 +-- test/cql-pytest/test_large_cells_rows.py | 43 ++++++++++++++++++++++++ types.hh | 8 +++++ 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 test/cql-pytest/test_large_cells_rows.py diff --git a/db/large_data_handler.cc b/db/large_data_handler.cc index c043eff204..1cfabad366 100644 --- a/db/large_data_handler.cc +++ b/db/large_data_handler.cc @@ -155,7 +155,7 @@ future<> cql_table_large_data_handler::record_large_cells(const sstables::sstabl auto ck_str = key_to_str(*clustering_key, s); return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("{} {}", ck_str, column_name), extra_fields, ck_str, column_name); } else { - return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, nullptr, column_name); + return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, data_value::make_null(utf8_type), column_name); } } @@ -167,7 +167,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable std::string ck_str = key_to_str(*clustering_key, s); return try_record("row", sst, partition_key, int64_t(row_size), "row", ck_str, extra_fields, ck_str); } else { - return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, nullptr); + return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type)); } } diff --git a/test/cql-pytest/test_large_cells_rows.py b/test/cql-pytest/test_large_cells_rows.py new file mode 100644 index 0000000000..3a40e5aa38 --- /dev/null +++ b/test/cql-pytest/test_large_cells_rows.py @@ -0,0 +1,43 @@ +# Copyright 2020 ScyllaDB +# +# This file is part of Scylla. +# +# Scylla is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Scylla is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Scylla. If not, see . + +from util import new_test_table + +import requests + +def test_create_large_static_cells_and_rows(cql, test_keyspace): + '''Test that `large_data_handler` successfully reports large static cells + and static rows and this doesn't cause a crash of Scylla server. + + This is a regression test for https://github.com/scylladb/scylla/issues/6780''' + schema = "pk int, ck int, user_ids set static, PRIMARY KEY (pk, ck)" + with new_test_table(cql, test_keyspace, schema) as table: + insert_stmt = cql.prepare(f"INSERT INTO {table} (pk, ck, user_ids) VALUES (?, ?, ?) USING TIMEOUT 5m") + # Default large data threshold for cells is 1 mb, for rows it is 10 mb. + # Take 10 mb cell to trigger large data reporting code both for + # static cells and static rows simultaneously. + large_set = {'x' * 1024 * 1024 * 10} + cql.execute(insert_stmt, [1, 1, large_set]) + + # REST API endpoint address for test scylla node + node_address = f'http://{cql.cluster.contact_points[0]}:10000' + # Execute force flush of test table to persistent storage, which is necessary to trigger + # `large_data_handler` execution. + table_without_ks = table[table.find('.') + 1:] # strip keyspace part from the table name + requests.post(f'{node_address}/storage_service/keyspace_flush/{test_keyspace}', params={'cf' : table_without_ks}) + # No need to check that the Scylla server is running here, since the test will + # fail automatically in case Scylla crashes. \ No newline at end of file diff --git a/types.hh b/types.hh index c1fe1538b1..c5ff17d5cb 100644 --- a/types.hh +++ b/types.hh @@ -384,6 +384,14 @@ public: data_value(const std::string&); data_value(const sstring&); + // Do not allow construction of a data_value from nullptr. The reason is + // that this is error prone, for example: it conflicts with `const char*` overload + // which tries to allocate a value from it and will cause UB. + // + // We want the null value semantics here instead. So the user will be forced + // to explicitly call `make_null()` instead. + data_value(std::nullptr_t) = delete; + data_value(ascii_native_type); data_value(bool); data_value(int8_t);