Merge 'cql3: fix stack overflow and quadratic behavior' from Avi Kivity

This series fixes two vulnerabilities:

unbounded recursion during expression evaluation with deeply nested expressions
quadratic computation with large WHERE clauses
The fixes simply bound the depth of recursion and the length of the WHERE clause.

The WHERE clause limits are configurable. Nesting is less likely to be exceeded, so not configurable.

Limits inspired by Common Expression Language:

https://github.com/google/cel-spec/blob/master/doc/langdef.md#syntax

Implementations are required to support at least:

24-32 repetitions of repeating rules
12 repetitions of recursive rules

CVE-2026-31948
CVE-2026-31947

Fixes https://scylladb.atlassian.net/browse/SCYLLADB-1003
Fixes https://scylladb.atlassian.net/browse/SCYLLADB-1002
Fixes https://github.com/scylladb/scylladb/issues/14472

Closes scylladb/scylladb-ghsa-m4h7-g37h-mgxf#3

* github.com:scylladb/scylladb-ghsa-m4h7-g37h-mgxf:
  cql3: limit number of relations in WHERE clause
  cql3: add max_relations_in_where_clause to dialect
  test/cqlpy: add tests for WHERE clause relation count limit
  cql3: limit nesting depth of function calls and CASTs in CQL parser
  test/cqlpy: add tests for deeply nested function calls and CASTs
This commit is contained in:
Nadav Har'El
2026-06-01 22:31:56 +03:00
10 changed files with 176 additions and 4 deletions

View File

@@ -141,6 +141,8 @@ using unwrap_uninitialized_t = typename unwrap_uninitialized<T>::type;
using uexpression = uninitialized<expression>;
inline int max_expression_nesting = 12;
}
@context {
@@ -150,6 +152,7 @@ using uexpression = uninitialized<expression>;
listener_type* listener;
dialect _dialect;
int _nesting = 0;
// Keeps the names of all bind variables. For bind variables without a name ('?'), the name is nullptr.
// Maps bind_index -> name.
@@ -248,6 +251,24 @@ using uexpression = uninitialized<expression>;
_missing_tokens.emplace_back(token);
return token;
}
class nesting_guard {
int& _nesting_ref;
public:
nesting_guard(CqlParser& context) : _nesting_ref(context._nesting) {
if (++_nesting_ref > max_expression_nesting) {
context.add_recognition_error("expression nested too deeply");
}
}
~nesting_guard() {
--_nesting_ref;
}
nesting_guard(const nesting_guard&) = delete;
nesting_guard(nesting_guard&&) = delete;
nesting_guard& operator=(const nesting_guard&) = delete;
nesting_guard& operator=(nesting_guard&&) = delete;
};
}
@lexer::namespace{cql3_parser}
@@ -426,6 +447,7 @@ selector returns [shared_ptr<raw_selector> s]
;
unaliasedSelector returns [uexpression tmp]
@init { nesting_guard guard(*this); }
: ( c=cident { tmp = unresolved_identifier{std::move(c)}; }
| v=value { tmp = std::move(v); }
| K_COUNT '(' countArgument ')' { tmp = make_count_rows_function_expression(); }
@@ -455,7 +477,13 @@ countArgument
whereClause returns [uexpression clause]
@init { std::vector<expression> terms; }
: e1=relation { terms.push_back(std::move(e1)); } (K_AND en=relation { terms.push_back(std::move(en)); })*
: e1=relation { terms.push_back(std::move(e1)); }
(K_AND en=relation {
terms.push_back(std::move(en));
if (terms.size() > _dialect.max_relations_in_where_clause) {
add_recognition_error("too many relations in WHERE clause");
}
})*
{ clause = conjunction{std::move(terms)}; }
;
@@ -1713,6 +1741,7 @@ functionArgs returns [std::vector<expression> a]
;
term returns [uexpression term1]
@init { nesting_guard guard(*this); }
: v=value { $term1 = std::move(v); }
| f=functionName args=functionArgs { $term1 = function_call{std::move(f), std::move(args)}; }
| '(' c=comparatorType ')' t=term { $term1 = cast{.style = cast::cast_style::c, .arg = std::move(t), .type = c}; }
@@ -1861,7 +1890,10 @@ relationType returns [oper_t op = oper_t{}]
;
relation returns [uexpression e]
@init{ oper_t rt; }
@init{
oper_t rt;
nesting_guard guard(*this);
}
: name=cident type=relationType t=term { $e = binary_operator(unresolved_identifier{std::move(name)}, type, std::move(t)); }
| K_TOKEN l=tupleOfIdentifiers type=relationType t=term

View File

@@ -9,6 +9,7 @@ namespace cql3 {
struct dialect {
bool duplicate_bind_variable_names_refer_to_same_variable = true; // if :a is found twice in a query, the two references are to the same variable (see #15559)
unsigned max_relations_in_where_clause = 100; // maximum number of relations in a WHERE clause
bool operator==(const dialect&) const = default;
};
@@ -17,6 +18,7 @@ dialect
internal_dialect() {
return dialect{
.duplicate_bind_variable_names_refer_to_same_variable = true,
.max_relations_in_where_clause = 100,
};
}
@@ -28,7 +30,7 @@ struct fmt::formatter<cql3::dialect> {
template <typename FormatContext>
auto format(const cql3::dialect& d, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "cql3::dialect{{duplicate_bind_variable_names_refer_to_same_variable={}}}",
d.duplicate_bind_variable_names_refer_to_same_variable);
return fmt::format_to(ctx.out(), "cql3::dialect{{duplicate_bind_variable_names_refer_to_same_variable={}, max_relations_in_where_clause={}}}",
d.duplicate_bind_variable_names_refer_to_same_variable, d.max_relations_in_where_clause);
}
};

View File

@@ -1580,6 +1580,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"Use on a new, parallel algorithm for performing aggregate queries.")
, cql_duplicate_bind_variable_names_refer_to_same_variable(this, "cql_duplicate_bind_variable_names_refer_to_same_variable", liveness::LiveUpdate, value_status::Used, true,
"A bind variable that appears twice in a CQL query refers to a single variable (if false, no name matching is performed).")
, max_relations_in_where_clause(this, "max_relations_in_where_clause", liveness::LiveUpdate, value_status::Used, 100,
"Maximum number of relations allowed in a WHERE clause. Queries with too many relations can cause quadratic complexity.")
, select_internal_page_size(this, "select_internal_page_size", liveness::LiveUpdate, value_status::Used, 10000,
"SELECT statements with aggregation or GROUP BYs or a secondary index may use this page size for their internal reading data, not the page size specified in the query options.")
, alternator_port(this, "alternator_port", value_status::Used, 0, "Alternator API port.")

View File

@@ -498,6 +498,7 @@ public:
named_value<bool> enable_cql_config_updates;
named_value<bool> enable_parallelized_aggregation;
named_value<bool> cql_duplicate_bind_variable_names_refer_to_same_variable;
named_value<uint32_t> max_relations_in_where_clause;
named_value<uint32_t> select_internal_page_size;
named_value<uint16_t> alternator_port;

View File

@@ -16,6 +16,7 @@ namespace cql3 {
struct dialect {
bool duplicate_bind_variable_names_refer_to_same_variable;
unsigned max_relations_in_where_clause;
};
}

View File

@@ -0,0 +1,130 @@
# Copyright 2026-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
# Tests for cases that overflow the grammar or cause overly complex
# expressions that later consume too much time during the analysis phase.
import pytest
from cassandra.protocol import SyntaxException, InvalidRequest
from .util import new_test_table
@pytest.fixture(scope="module")
def table1(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v bigint") as table:
cql.execute(f"INSERT INTO {table} (p, v) VALUES (1, 1)")
yield table
def nested_function_selector(depth):
"""Build blobasbigint(bigintasblob(blobasbigint(bigintasblob(... v ...))))
Uses alternating bigintasblob/blobasbigint so that every level is
type-correct: bigint -> blob -> bigint -> blob -> ...
"""
depth //= 2
return "blobasbigint(bigintasblob(" * depth + "v" + "))" * depth
def nested_cast_selector(depth):
"""Build CAST(CAST(CAST(... CAST(v AS bigint) ... AS bigint) AS bigint) AS bigint)"""
return f"CAST(" * depth + "v" + f" AS bigint)" * depth
def nested_function_term(depth):
"""Build blobasbigint(bigintasblob(... blobasbigint(bigintasblob((bigint)1)) ...))
The innermost expression is (bigint)1 (a bigint value).
Even depths end with blob type, odd depths end with bigint type.
We always make the outermost return bigint so it matches the column type.
"""
depth //= 2
return "blobasbigint(bigintasblob(" * depth + "(bigint)1" + "))" * depth
def nested_c_cast_term(depth):
"""Build (bigint)(bigint)...(bigint)1"""
return "(bigint)" * depth + "1"
def nested_relation(depth):
"""Build (((... p=1 ...)))"""
return "(" * depth + "p=1" + ")" * depth
# The default max_function_call_nesting is 12. Use a depth large enough
# to overflow the evaluator stack before the fix is in place.
DEPTH = 100000
SHALLOW_DEPTH = 10
def test_deeply_nested_function_in_selector(cql, table1, scylla_only):
"""Deeply nested function calls in a SELECT selector must be rejected."""
selector = nested_function_selector(DEPTH)
with pytest.raises(SyntaxException):
cql.execute(f"SELECT {selector} FROM {table1}")
# see that shallow nesting is accepted
selector = nested_function_selector(SHALLOW_DEPTH)
cql.execute(f"SELECT {selector} FROM {table1}")
def test_deeply_nested_cast_in_selector(cql, table1, scylla_only):
"""Deeply nested CAST() in a SELECT selector must be rejected."""
selector = nested_cast_selector(DEPTH)
with pytest.raises(SyntaxException):
cql.execute(f"SELECT {selector} FROM {table1}")
# see that shallow nesting is accepted
selector = nested_cast_selector(SHALLOW_DEPTH)
cql.execute(f"SELECT {selector} FROM {table1}")
def test_deeply_nested_function_in_term(cql, table1, scylla_only):
"""Deeply nested function calls in a WHERE term must be rejected."""
term = nested_function_term(DEPTH)
with pytest.raises(SyntaxException):
cql.execute(f"SELECT * FROM {table1} WHERE v = {term} ALLOW FILTERING")
# see that shallow nesting is accepted
term = nested_function_term(SHALLOW_DEPTH-1) # -1 because the cast adds one more level of nesting, see nested_function_term()
cql.execute(f"SELECT * FROM {table1} WHERE v = {term} ALLOW FILTERING")
def test_deeply_nested_c_cast_in_term(cql, table1, scylla_only):
"""Deeply nested C-style casts in a WHERE term must be rejected."""
term = nested_c_cast_term(DEPTH)
with pytest.raises(SyntaxException):
cql.execute(f"SELECT * FROM {table1} WHERE v = {term} ALLOW FILTERING")
# see that shallow nesting is accepted
term = nested_c_cast_term(SHALLOW_DEPTH)
cql.execute(f"SELECT * FROM {table1} WHERE v = {term} ALLOW FILTERING")
def test_deeply_nested_relation(cql, table1, scylla_only):
"""Deeply nested parentheses in a WHERE relation must be rejected."""
relation = nested_relation(DEPTH)
with pytest.raises(SyntaxException):
cql.execute(f"SELECT * FROM {table1} WHERE {relation} ALLOW FILTERING")
# see that shallow nesting is accepted
relation = nested_relation(SHALLOW_DEPTH)
cql.execute(f"SELECT * FROM {table1} WHERE {relation} ALLOW FILTERING")
def test_lots_of_opening_paren_not_closed(cql, table1, scylla_only):
"""An opening parenthesis with no closing parenthesis must be rejected."""
with pytest.raises(SyntaxException):
cql.execute(f"SELECT * FROM {table1} WHERE " + "(" * DEPTH)
# The default max_relations_in_where_clause is 100.
OVER_LIMIT = 200
def make_where_clause(n):
"""Build a WHERE clause with n relations: p = 1 AND v = 1 AND v = 1 ..."""
return "p = 1" + " AND v = 1" * (n - 1)
def test_too_many_relations_in_where_clause(cql, table1, scylla_only):
"""A WHERE clause with too many relations must be rejected."""
where = make_where_clause(OVER_LIMIT)
with pytest.raises(SyntaxException):
cql.execute(f"SELECT * FROM {table1} WHERE {where} ALLOW FILTERING")
def test_reasonable_number_of_relations_allowed(cql, table1, scylla_only):
"""A WHERE clause within the limit should be accepted."""
where = make_where_clause(50)
# Should not raise - we just need it to parse successfully.
# The query itself may return no rows, that's fine.
cql.execute(f"SELECT * FROM {table1} WHERE {where} ALLOW FILTERING")

View File

@@ -213,6 +213,7 @@ private:
cql3::dialect test_dialect() {
return cql3::dialect{
.duplicate_bind_variable_names_refer_to_same_variable = _db.local().get_config().cql_duplicate_bind_variable_names_refer_to_same_variable(),
.max_relations_in_where_clause = _db.local().get_config().max_relations_in_where_clause(),
};
}

View File

@@ -266,6 +266,7 @@ future<> controller::do_start_server() {
.bounce_request_smp_service_group = bounce_request_smp_service_group,
.max_concurrent_requests = cfg.max_concurrent_requests_per_shard,
.cql_duplicate_bind_variable_names_refer_to_same_variable = cfg.cql_duplicate_bind_variable_names_refer_to_same_variable,
.max_relations_in_where_clause = cfg.max_relations_in_where_clause,
.uninitialized_connections_semaphore_cpu_concurrency = cfg.uninitialized_connections_semaphore_cpu_concurrency,
.request_timeout_on_shutdown_in_seconds = cfg.request_timeout_on_shutdown_in_seconds
};

View File

@@ -1948,6 +1948,7 @@ cql3::dialect
cql_server::connection::get_dialect() const {
return cql3::dialect{
.duplicate_bind_variable_names_refer_to_same_variable = _server._config.cql_duplicate_bind_variable_names_refer_to_same_variable,
.max_relations_in_where_clause = _server._config.max_relations_in_where_clause,
};
}

View File

@@ -127,6 +127,7 @@ struct cql_server_config {
smp_service_group bounce_request_smp_service_group = default_smp_service_group();
utils::updateable_value<uint32_t> max_concurrent_requests;
utils::updateable_value<bool> cql_duplicate_bind_variable_names_refer_to_same_variable;
utils::updateable_value<uint32_t> max_relations_in_where_clause;
utils::updateable_value<uint32_t> uninitialized_connections_semaphore_cpu_concurrency;
utils::updateable_value<uint32_t> request_timeout_on_shutdown_in_seconds;
};