diff --git a/cql3/Cql.g b/cql3/Cql.g index 0a3de52b23..a764b45f42 100644 --- a/cql3/Cql.g +++ b/cql3/Cql.g @@ -141,6 +141,8 @@ using unwrap_uninitialized_t = typename unwrap_uninitialized::type; using uexpression = uninitialized; +inline int max_expression_nesting = 12; + } @context { @@ -150,6 +152,7 @@ using uexpression = uninitialized; listener_type* listener; dialect _dialect; + int _nesting = 0; // Keeps the names of all bind variables. For bind variables without a name ('?'), the name is nullptr. // Maps bind_index -> name. @@ -248,6 +251,24 @@ using uexpression = uninitialized; _missing_tokens.emplace_back(token); return token; } + + class nesting_guard { + int& _nesting_ref; + public: + nesting_guard(CqlParser& context) : _nesting_ref(context._nesting) { + if (++_nesting_ref > max_expression_nesting) { + context.add_recognition_error("expression nested too deeply"); + } + } + ~nesting_guard() { + --_nesting_ref; + } + nesting_guard(const nesting_guard&) = delete; + nesting_guard(nesting_guard&&) = delete; + nesting_guard& operator=(const nesting_guard&) = delete; + nesting_guard& operator=(nesting_guard&&) = delete; + }; + } @lexer::namespace{cql3_parser} @@ -426,6 +447,7 @@ selector returns [shared_ptr s] ; unaliasedSelector returns [uexpression tmp] + @init { nesting_guard guard(*this); } : ( c=cident { tmp = unresolved_identifier{std::move(c)}; } | v=value { tmp = std::move(v); } | K_COUNT '(' countArgument ')' { tmp = make_count_rows_function_expression(); } @@ -455,7 +477,13 @@ countArgument whereClause returns [uexpression clause] @init { std::vector terms; } - : e1=relation { terms.push_back(std::move(e1)); } (K_AND en=relation { terms.push_back(std::move(en)); })* + : e1=relation { terms.push_back(std::move(e1)); } + (K_AND en=relation { + terms.push_back(std::move(en)); + if (terms.size() > _dialect.max_relations_in_where_clause) { + add_recognition_error("too many relations in WHERE clause"); + } + })* { clause = conjunction{std::move(terms)}; } ; @@ -1713,6 +1741,7 @@ functionArgs returns [std::vector a] ; term returns [uexpression term1] + @init { nesting_guard guard(*this); } : v=value { $term1 = std::move(v); } | f=functionName args=functionArgs { $term1 = function_call{std::move(f), std::move(args)}; } | '(' c=comparatorType ')' t=term { $term1 = cast{.style = cast::cast_style::c, .arg = std::move(t), .type = c}; } @@ -1861,7 +1890,10 @@ relationType returns [oper_t op = oper_t{}] ; relation returns [uexpression e] - @init{ oper_t rt; } + @init{ + oper_t rt; + nesting_guard guard(*this); + } : name=cident type=relationType t=term { $e = binary_operator(unresolved_identifier{std::move(name)}, type, std::move(t)); } | K_TOKEN l=tupleOfIdentifiers type=relationType t=term diff --git a/cql3/dialect.hh b/cql3/dialect.hh index 85bc3e2dbc..de07a16cf4 100644 --- a/cql3/dialect.hh +++ b/cql3/dialect.hh @@ -9,6 +9,7 @@ namespace cql3 { struct dialect { bool duplicate_bind_variable_names_refer_to_same_variable = true; // if :a is found twice in a query, the two references are to the same variable (see #15559) + unsigned max_relations_in_where_clause = 100; // maximum number of relations in a WHERE clause bool operator==(const dialect&) const = default; }; @@ -17,6 +18,7 @@ dialect internal_dialect() { return dialect{ .duplicate_bind_variable_names_refer_to_same_variable = true, + .max_relations_in_where_clause = 100, }; } @@ -28,7 +30,7 @@ struct fmt::formatter { template auto format(const cql3::dialect& d, FormatContext& ctx) const { - return fmt::format_to(ctx.out(), "cql3::dialect{{duplicate_bind_variable_names_refer_to_same_variable={}}}", - d.duplicate_bind_variable_names_refer_to_same_variable); + return fmt::format_to(ctx.out(), "cql3::dialect{{duplicate_bind_variable_names_refer_to_same_variable={}, max_relations_in_where_clause={}}}", + d.duplicate_bind_variable_names_refer_to_same_variable, d.max_relations_in_where_clause); } }; diff --git a/db/config.cc b/db/config.cc index 9f2e9f95bb..17ad1c5b87 100644 --- a/db/config.cc +++ b/db/config.cc @@ -1580,6 +1580,8 @@ db::config::config(std::shared_ptr exts) "Use on a new, parallel algorithm for performing aggregate queries.") , cql_duplicate_bind_variable_names_refer_to_same_variable(this, "cql_duplicate_bind_variable_names_refer_to_same_variable", liveness::LiveUpdate, value_status::Used, true, "A bind variable that appears twice in a CQL query refers to a single variable (if false, no name matching is performed).") + , max_relations_in_where_clause(this, "max_relations_in_where_clause", liveness::LiveUpdate, value_status::Used, 100, + "Maximum number of relations allowed in a WHERE clause. Queries with too many relations can cause quadratic complexity.") , select_internal_page_size(this, "select_internal_page_size", liveness::LiveUpdate, value_status::Used, 10000, "SELECT statements with aggregation or GROUP BYs or a secondary index may use this page size for their internal reading data, not the page size specified in the query options.") , alternator_port(this, "alternator_port", value_status::Used, 0, "Alternator API port.") diff --git a/db/config.hh b/db/config.hh index 07dbda7e13..306417fbcd 100644 --- a/db/config.hh +++ b/db/config.hh @@ -498,6 +498,7 @@ public: named_value enable_cql_config_updates; named_value enable_parallelized_aggregation; named_value cql_duplicate_bind_variable_names_refer_to_same_variable; + named_value max_relations_in_where_clause; named_value select_internal_page_size; named_value alternator_port; diff --git a/idl/forward_cql.idl.hh b/idl/forward_cql.idl.hh index e947e2fab8..d15b12595e 100644 --- a/idl/forward_cql.idl.hh +++ b/idl/forward_cql.idl.hh @@ -16,6 +16,7 @@ namespace cql3 { struct dialect { bool duplicate_bind_variable_names_refer_to_same_variable; + unsigned max_relations_in_where_clause; }; } diff --git a/test/cqlpy/test_grammar_overflow.py b/test/cqlpy/test_grammar_overflow.py new file mode 100644 index 0000000000..9920b6e65d --- /dev/null +++ b/test/cqlpy/test_grammar_overflow.py @@ -0,0 +1,130 @@ +# Copyright 2026-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + +# Tests for cases that overflow the grammar or cause overly complex +# expressions that later consume too much time during the analysis phase. + +import pytest +from cassandra.protocol import SyntaxException, InvalidRequest +from .util import new_test_table + + +@pytest.fixture(scope="module") +def table1(cql, test_keyspace): + with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v bigint") as table: + cql.execute(f"INSERT INTO {table} (p, v) VALUES (1, 1)") + yield table + + +def nested_function_selector(depth): + """Build blobasbigint(bigintasblob(blobasbigint(bigintasblob(... v ...)))) + + Uses alternating bigintasblob/blobasbigint so that every level is + type-correct: bigint -> blob -> bigint -> blob -> ... + """ + depth //= 2 + return "blobasbigint(bigintasblob(" * depth + "v" + "))" * depth + +def nested_cast_selector(depth): + """Build CAST(CAST(CAST(... CAST(v AS bigint) ... AS bigint) AS bigint) AS bigint)""" + return f"CAST(" * depth + "v" + f" AS bigint)" * depth + +def nested_function_term(depth): + """Build blobasbigint(bigintasblob(... blobasbigint(bigintasblob((bigint)1)) ...)) + + The innermost expression is (bigint)1 (a bigint value). + Even depths end with blob type, odd depths end with bigint type. + We always make the outermost return bigint so it matches the column type. + """ + depth //= 2 + return "blobasbigint(bigintasblob(" * depth + "(bigint)1" + "))" * depth + +def nested_c_cast_term(depth): + """Build (bigint)(bigint)...(bigint)1""" + return "(bigint)" * depth + "1" + +def nested_relation(depth): + """Build (((... p=1 ...)))""" + return "(" * depth + "p=1" + ")" * depth + +# The default max_function_call_nesting is 12. Use a depth large enough +# to overflow the evaluator stack before the fix is in place. +DEPTH = 100000 + +SHALLOW_DEPTH = 10 + +def test_deeply_nested_function_in_selector(cql, table1, scylla_only): + """Deeply nested function calls in a SELECT selector must be rejected.""" + selector = nested_function_selector(DEPTH) + with pytest.raises(SyntaxException): + cql.execute(f"SELECT {selector} FROM {table1}") + # see that shallow nesting is accepted + selector = nested_function_selector(SHALLOW_DEPTH) + cql.execute(f"SELECT {selector} FROM {table1}") + + +def test_deeply_nested_cast_in_selector(cql, table1, scylla_only): + """Deeply nested CAST() in a SELECT selector must be rejected.""" + selector = nested_cast_selector(DEPTH) + with pytest.raises(SyntaxException): + cql.execute(f"SELECT {selector} FROM {table1}") + # see that shallow nesting is accepted + selector = nested_cast_selector(SHALLOW_DEPTH) + cql.execute(f"SELECT {selector} FROM {table1}") + + +def test_deeply_nested_function_in_term(cql, table1, scylla_only): + """Deeply nested function calls in a WHERE term must be rejected.""" + term = nested_function_term(DEPTH) + with pytest.raises(SyntaxException): + cql.execute(f"SELECT * FROM {table1} WHERE v = {term} ALLOW FILTERING") + # see that shallow nesting is accepted + term = nested_function_term(SHALLOW_DEPTH-1) # -1 because the cast adds one more level of nesting, see nested_function_term() + cql.execute(f"SELECT * FROM {table1} WHERE v = {term} ALLOW FILTERING") + + +def test_deeply_nested_c_cast_in_term(cql, table1, scylla_only): + """Deeply nested C-style casts in a WHERE term must be rejected.""" + term = nested_c_cast_term(DEPTH) + with pytest.raises(SyntaxException): + cql.execute(f"SELECT * FROM {table1} WHERE v = {term} ALLOW FILTERING") + # see that shallow nesting is accepted + term = nested_c_cast_term(SHALLOW_DEPTH) + cql.execute(f"SELECT * FROM {table1} WHERE v = {term} ALLOW FILTERING") + +def test_deeply_nested_relation(cql, table1, scylla_only): + """Deeply nested parentheses in a WHERE relation must be rejected.""" + relation = nested_relation(DEPTH) + with pytest.raises(SyntaxException): + cql.execute(f"SELECT * FROM {table1} WHERE {relation} ALLOW FILTERING") + # see that shallow nesting is accepted + relation = nested_relation(SHALLOW_DEPTH) + cql.execute(f"SELECT * FROM {table1} WHERE {relation} ALLOW FILTERING") + +def test_lots_of_opening_paren_not_closed(cql, table1, scylla_only): + """An opening parenthesis with no closing parenthesis must be rejected.""" + with pytest.raises(SyntaxException): + cql.execute(f"SELECT * FROM {table1} WHERE " + "(" * DEPTH) + +# The default max_relations_in_where_clause is 100. +OVER_LIMIT = 200 + +def make_where_clause(n): + """Build a WHERE clause with n relations: p = 1 AND v = 1 AND v = 1 ...""" + return "p = 1" + " AND v = 1" * (n - 1) + + +def test_too_many_relations_in_where_clause(cql, table1, scylla_only): + """A WHERE clause with too many relations must be rejected.""" + where = make_where_clause(OVER_LIMIT) + with pytest.raises(SyntaxException): + cql.execute(f"SELECT * FROM {table1} WHERE {where} ALLOW FILTERING") + + +def test_reasonable_number_of_relations_allowed(cql, table1, scylla_only): + """A WHERE clause within the limit should be accepted.""" + where = make_where_clause(50) + # Should not raise - we just need it to parse successfully. + # The query itself may return no rows, that's fine. + cql.execute(f"SELECT * FROM {table1} WHERE {where} ALLOW FILTERING") diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index 150def3bce..e2ac86f7c8 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -213,6 +213,7 @@ private: cql3::dialect test_dialect() { return cql3::dialect{ .duplicate_bind_variable_names_refer_to_same_variable = _db.local().get_config().cql_duplicate_bind_variable_names_refer_to_same_variable(), + .max_relations_in_where_clause = _db.local().get_config().max_relations_in_where_clause(), }; } diff --git a/transport/controller.cc b/transport/controller.cc index 2993cfec59..1bc44776d7 100644 --- a/transport/controller.cc +++ b/transport/controller.cc @@ -266,6 +266,7 @@ future<> controller::do_start_server() { .bounce_request_smp_service_group = bounce_request_smp_service_group, .max_concurrent_requests = cfg.max_concurrent_requests_per_shard, .cql_duplicate_bind_variable_names_refer_to_same_variable = cfg.cql_duplicate_bind_variable_names_refer_to_same_variable, + .max_relations_in_where_clause = cfg.max_relations_in_where_clause, .uninitialized_connections_semaphore_cpu_concurrency = cfg.uninitialized_connections_semaphore_cpu_concurrency, .request_timeout_on_shutdown_in_seconds = cfg.request_timeout_on_shutdown_in_seconds }; diff --git a/transport/server.cc b/transport/server.cc index 4003547b35..2caaa0da62 100644 --- a/transport/server.cc +++ b/transport/server.cc @@ -1948,6 +1948,7 @@ cql3::dialect cql_server::connection::get_dialect() const { return cql3::dialect{ .duplicate_bind_variable_names_refer_to_same_variable = _server._config.cql_duplicate_bind_variable_names_refer_to_same_variable, + .max_relations_in_where_clause = _server._config.max_relations_in_where_clause, }; } diff --git a/transport/server.hh b/transport/server.hh index 2260aef822..4b220be435 100644 --- a/transport/server.hh +++ b/transport/server.hh @@ -127,6 +127,7 @@ struct cql_server_config { smp_service_group bounce_request_smp_service_group = default_smp_service_group(); utils::updateable_value max_concurrent_requests; utils::updateable_value cql_duplicate_bind_variable_names_refer_to_same_variable; + utils::updateable_value max_relations_in_where_clause; utils::updateable_value uninitialized_connections_semaphore_cpu_concurrency; utils::updateable_value request_timeout_on_shutdown_in_seconds; };