Compare commits

...

3 Commits

Author SHA1 Message Date
Piotr Smaron
bf6b690adc cql3: allow IS NOT NULL restrictions on non-PK columns in MVs
Create view statement validation logic was too strict, rejecting IS NOT NULL
restrictions on columns that are not part of the view's primary key.
This patch relaxes the check to explicitly allow IS NOT NULL restrictions
on non-PK columns, enabling views that filter source rows based on the
presence of specific columns.
Fixes an issue where valid Materialized Views using WHERE col IS NOT NULL
were rejected if col was a regular column.
2026-01-22 14:26:35 +01:00
Piotr Smaron
f21b7953a2 cql3: implementations of IS NULL/IS NOT NULL in statement restrictions
This patch incorporates IS NULL and IS NOT NULL restrictions into the
statement restrictions logic.
Key changes:
- Logic added to handle IS and IS_NOT operators in add_restriction and
  related functions.
- IS NULL/IS NOT NULL are treated as restrictions that require filtering
  (unless potentially optimized in future, currently they are filters).
- Added separate handling for IS NULL/IS NOT NULL to verify they are
  single-column restrictions and check compatibility with other restrictions.
- Updated possible_lhs_values to return unbounded set for IS/IS NOT
  operators, as they don't restrict the range of values in a way that
  can be used for clustering key slicing (they are presence checks).
2026-01-19 15:26:17 +01:00
Piotr Smaron
aa07a1b3e8 cql3: Add IS NULL/IS NOT NULL support to expression layer
This patch adds the core support for IS NULL and IS NOT NULL operators
in the expression layer.
Key changes:
- expression.hh: Added IS and IS_NOT to oper_t enum.
- Cql.g: Updated grammar to parse IS NULL and IS NOT NULL.
- expression.cc: Implemented evaluation logic for IS and IS_NOT.
  IS NULL returns true if the operand is null (missing).
  IS NOT NULL returns true if the operand is present.
- prepare_expr.cc: Updated expression preparation to handle the new
  operators, ensuring the RHS is explicitly NULL.
- restrictions.cc: Added validation to ensure IS/IS NOT are only used
  with NULL.
- expr_test.cc: Added unit tests for the new operators.
2026-01-19 15:19:44 +01:00
12 changed files with 476 additions and 60 deletions

View File

@@ -1889,6 +1889,8 @@ relation returns [uexpression e]
}
| name=cident K_IS K_NOT K_NULL {
$e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IS_NOT, make_untyped_null()); }
| name=cident K_IS K_NULL {
$e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IS, make_untyped_null()); }
| name=cident K_IN marker1=marker
{ $e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IN, std::move(marker1)); }
| name=cident K_IN in_values=singleColumnInValues

View File

@@ -326,7 +326,8 @@ bool_or_null limits(const expression& lhs, oper_t op, null_handling_style null_h
case oper_t::CONTAINS:
case oper_t::CONTAINS_KEY:
case oper_t::LIKE:
case oper_t::IS_NOT: // IS_NOT doesn't really belong here, luckily this is never reached.
case oper_t::IS:
case oper_t::IS_NOT: // IS and IS_NOT operators are handled separately in their dedicated evaluation functions
throw exceptions::invalid_request_exception(fmt::format("Invalid comparison with null for operator \"{}\"", op));
case oper_t::EQ:
return !sides_bytes.first == !sides_bytes.second;
@@ -557,6 +558,15 @@ bool is_not_null(const expression& lhs, const expression& rhs, const evaluation_
return !lhs_val.is_null();
}
bool is_null(const expression& lhs, const expression& rhs, const evaluation_inputs& inputs) {
cql3::raw_value lhs_val = evaluate(lhs, inputs);
cql3::raw_value rhs_val = evaluate(rhs, inputs);
if (!rhs_val.is_null()) {
throw exceptions::invalid_request_exception("IS operator accepts only NULL as its right side");
}
return lhs_val.is_null();
}
} // anonymous namespace
bool is_satisfied_by(const expression& restr, const evaluation_inputs& inputs) {
@@ -1128,6 +1138,9 @@ cql3::raw_value do_evaluate(const binary_operator& binop, const evaluation_input
case oper_t::NOT_IN:
binop_result = is_none_of(binop.lhs, binop.rhs, inputs, binop.null_handling);
break;
case oper_t::IS:
binop_result = is_null(binop.lhs, binop.rhs, inputs);
break;
case oper_t::IS_NOT:
binop_result = is_not_null(binop.lhs, binop.rhs, inputs);
break;
@@ -2526,6 +2539,8 @@ std::string_view fmt::formatter<cql3::expr::oper_t>::to_string(const cql3::expr:
return "CONTAINS";
case oper_t::CONTAINS_KEY:
return "CONTAINS KEY";
case oper_t::IS:
return "IS";
case oper_t::IS_NOT:
return "IS NOT";
case oper_t::LIKE:

View File

@@ -221,7 +221,7 @@ const column_value& get_subscripted_column(const subscript&);
/// Only columns can be subscripted in CQL, so we can expect that the subscripted expression is a column_value.
const column_value& get_subscripted_column(const expression&);
enum class oper_t { EQ, NEQ, LT, LTE, GTE, GT, IN, NOT_IN, CONTAINS, CONTAINS_KEY, IS_NOT, LIKE };
enum class oper_t { EQ, NEQ, LT, LTE, GTE, GT, IN, NOT_IN, CONTAINS, CONTAINS_KEY, IS, IS_NOT, LIKE };
/// Describes the nature of clustering-key comparisons. Useful for implementing SCYLLA_CLUSTERING_BOUND.
enum class comparison_order : char {

View File

@@ -1803,14 +1803,14 @@ binary_operator prepare_binary_operator(binary_operator binop, data_dictionary::
lw_shared_ptr<column_specification> rhs_receiver = get_rhs_receiver(lhs_receiver, binop.op);
expression prepared_rhs = prepare_expression(binop.rhs, db, table_schema.ks_name(), &table_schema, rhs_receiver);
// IS NOT NULL requires an additional check that the RHS is NULL.
// Otherwise things like `int_col IS NOT 123` would be allowed - the types match, but the value is wrong.
if (binop.op == oper_t::IS_NOT) {
// IS NULL and IS NOT NULL require an additional check that the RHS is NULL.
// Otherwise things like `int_col IS 123` or `int_col IS NOT 123` would be allowed - the types match, but the value is wrong.
if (binop.op == oper_t::IS || binop.op == oper_t::IS_NOT) {
bool rhs_is_null = is<constant>(prepared_rhs) && as<constant>(prepared_rhs).is_null();
if (!rhs_is_null) {
throw exceptions::invalid_request_exception(format(
"IS NOT NULL is the only expression that is allowed when using IS NOT. Invalid binary operator: {:user}",
"IS NULL and IS NOT NULL are the only expressions that are allowed when using IS/IS NOT. Invalid binary operator: {:user}",
binop));
}
}

View File

@@ -121,11 +121,11 @@ void preliminary_binop_vaidation_checks(const binary_operator& binop) {
throw exceptions::invalid_request_exception(format("Unsupported \"!=\" relation: {:user}", binop));
}
if (binop.op == oper_t::IS_NOT) {
if (binop.op == oper_t::IS || binop.op == oper_t::IS_NOT) {
bool rhs_is_null = (is<untyped_constant>(binop.rhs) && as<untyped_constant>(binop.rhs).partial_type == untyped_constant::type_class::null)
|| (is<constant>(binop.rhs) && as<constant>(binop.rhs).is_null());
if (!rhs_is_null) {
throw exceptions::invalid_request_exception(format("Unsupported \"IS NOT\" relation: {:user}", binop));
throw exceptions::invalid_request_exception(format("Unsupported \"IS\" or \"IS NOT\" relation: {:user}", binop));
}
}

View File

@@ -99,7 +99,8 @@ extern bool index_supports_some_column(
inline bool needs_filtering(oper_t op) {
return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE) ||
(op == oper_t::IS_NOT) || (op == oper_t::NEQ) || (op == oper_t::NOT_IN);
(op == oper_t::NEQ) || (op == oper_t::NOT_IN) ||
(op == oper_t::IS) || (op == oper_t::IS_NOT); // null-checking operators
}
inline auto find_needs_filtering(const expression& e) {
@@ -272,6 +273,12 @@ static value_set possible_lhs_values(const column_definition* cdef,
return empty_value_set; // All NULL comparisons fail; no column values match.
}
return value_set(value_list{*val});
} else if (oper.op == oper_t::IS || oper.op == oper_t::IS_NOT) {
// IS NULL and IS NOT NULL are special - they don't restrict to specific values
// IS NULL matches when column is unset, IS NOT NULL matches when column is set
// For value_set purposes, we return unbounded since these operators filter
// based on presence/absence rather than value comparison
return unbounded_value_set;
}
throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper));
},
@@ -1080,18 +1087,24 @@ statement_restrictions::statement_restrictions(data_dictionary::database db,
expr::binary_operator prepared_restriction = expr::validate_and_prepare_new_restriction(*relation_binop, db, schema, ctx);
add_restriction(prepared_restriction, schema, allow_filtering, for_view);
if (prepared_restriction.op != expr::oper_t::IS_NOT) {
if (prepared_restriction.op != expr::oper_t::IS_NOT && prepared_restriction.op != expr::oper_t::IS) {
_where = _where.has_value() ? make_conjunction(std::move(*_where), prepared_restriction) : prepared_restriction;
}
}
// Extract single-column restrictions from all restriction categories
// Note: This must be done even if _where is empty, because IS NULL/IS NOT NULL
// restrictions are not added to _where but are still in the category restrictions
if (!has_token_restrictions()) {
_single_column_partition_key_restrictions = get_single_column_restrictions_map(_partition_key_restrictions);
}
if (!contains_multi_column_restriction(_clustering_columns_restrictions)) {
_single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions);
}
_single_column_nonprimary_key_restrictions = get_single_column_restrictions_map(_nonprimary_key_restrictions);
// Extract clustering prefix and partition range restrictions from _where
// These require _where to have a value
if (_where.has_value()) {
if (!has_token_restrictions()) {
_single_column_partition_key_restrictions = get_single_column_restrictions_map(_partition_key_restrictions);
}
if (!contains_multi_column_restriction(_clustering_columns_restrictions)) {
_single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions);
}
_single_column_nonprimary_key_restrictions = get_single_column_restrictions_map(_nonprimary_key_restrictions);
_clustering_prefix_restrictions = extract_clustering_prefix_restrictions(*_where, _schema);
_partition_range_restrictions = extract_partition_range(*_where, _schema);
}
@@ -1451,9 +1464,9 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr
}
void statement_restrictions::add_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view) {
if (restr.op == expr::oper_t::IS_NOT) {
// Handle IS NOT NULL restrictions separately
add_is_not_restriction(restr, schema, for_view);
if (restr.op == expr::oper_t::IS || restr.op == expr::oper_t::IS_NOT) {
// Handle IS NULL and IS NOT NULL restrictions separately
add_is_null_restriction(restr, schema, for_view);
} else if (is_multi_column(restr)) {
// Multi column restrictions are only allowed on clustering columns
add_multi_column_clustering_key_restriction(restr);
@@ -1474,23 +1487,36 @@ void statement_restrictions::add_restriction(const expr::binary_operator& restr,
}
}
void statement_restrictions::add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view) {
void statement_restrictions::add_is_null_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view) {
const expr::column_value* lhs_col_def = expr::as_if<expr::column_value>(&restr.lhs);
// The "IS NOT NULL" restriction is only supported (and
// mandatory) for materialized view creation:
if (lhs_col_def == nullptr) {
throw exceptions::invalid_request_exception("IS NOT only supports single column");
throw exceptions::invalid_request_exception("IS NULL and IS NOT NULL only support single column");
}
// currently, the grammar only allows the NULL argument to be
// "IS NOT", so this assertion should not be able to fail
// "IS" or "IS NOT", so this assertion should not be able to fail
if (!expr::is<expr::constant>(restr.rhs) || !expr::as<expr::constant>(restr.rhs).is_null()) {
throw exceptions::invalid_request_exception("Only IS NOT NULL is supported");
throw exceptions::invalid_request_exception("Only IS NULL and IS NOT NULL are supported");
}
_not_null_columns.insert(lhs_col_def->col);
// For IS NOT NULL, track the column in _not_null_columns (needed for views)
if (restr.op == expr::oper_t::IS_NOT) {
_not_null_columns.insert(lhs_col_def->col);
}
if (!for_view) {
throw exceptions::invalid_request_exception(format("restriction '{}' is only supported in materialized view creation", restr));
// For materialized views, IS NOT NULL is mandatory on primary key columns
if (for_view && restr.op != expr::oper_t::IS_NOT) {
throw exceptions::invalid_request_exception(format("Restriction '{}' is not supported in materialized view creation. Only IS NOT NULL is allowed.", restr));
}
// Add the restriction to the appropriate category based on column type
// This ensures proper filtering checks are applied
const column_definition* def = lhs_col_def->col;
if (def->is_partition_key()) {
_partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr);
} else if (def->is_clustering_key()) {
_clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr);
} else {
_nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, restr);
}
}

View File

@@ -277,7 +277,7 @@ public:
private:
std::pair<std::optional<secondary_index::index>, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const;
void add_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view);
void add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view);
void add_is_null_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view);
void add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view);
void add_token_partition_key_restriction(const expr::binary_operator& restr);
void add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering);

View File

@@ -39,6 +39,25 @@ namespace cql3 {
namespace statements {
static bool is_only_is_not_null(const expr::expression& e) {
if (expr::is<expr::binary_operator>(e)) {
return expr::as<expr::binary_operator>(e).op == expr::oper_t::IS_NOT;
}
if (expr::is<expr::conjunction>(e)) {
const auto& children = expr::as<expr::conjunction>(e).children;
if (children.empty()) {
return true;
}
for (const auto& child : children) {
if (!is_only_is_not_null(child)) {
return false;
}
}
return true;
}
return false;
}
create_view_statement::create_view_statement(
cf_name view_name,
cf_name base_name,
@@ -295,23 +314,31 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
}
// The unique feature of a filter by a non-key column is that the
// value of such column can be updated - and also be expired with TTL
// value of such a column can be updated - and also be expired with TTL
// and cause the view row to appear and disappear. We don't currently
// support support this case - see issue #3430, and neither does
// Cassandra - see see CASSANDRA-13798 and CASSANDRA-13832.
// support this case - see issue #3430, and neither does
// Cassandra - see CASSANDRA-13798 and CASSANDRA-13832.
// Actually, as CASSANDRA-13798 explains, the problem is "the liveness of
// view row is now depending on multiple base columns (multiple filtered
// non-pk base column + base column used in view pk)". When the filtered
// column *is* the base column added to the view pk, we don't have this
// problem. And this case actually works correctly.
const expr::single_column_restrictions_map& non_pk_restrictions = restrictions->get_non_pk_restriction();
if (non_pk_restrictions.size() == 1 && has_non_pk_column &&
target_primary_keys.contains(non_pk_restrictions.cbegin()->first)) {
// This case (filter by new PK column of the view) works, as explained above
} else if (!non_pk_restrictions.empty()) {
std::vector<const column_definition*> illegal_non_pk_restrictions;
for (const auto& [col, expr] : non_pk_restrictions) {
if (target_primary_keys.contains(col)) {
continue;
}
if (is_only_is_not_null(expr)) {
continue;
}
illegal_non_pk_restrictions.push_back(col);
}
if (!illegal_non_pk_restrictions.empty()) {
throw exceptions::invalid_request_exception(seastar::format("Non-primary key columns cannot be restricted in the SELECT statement used for materialized view {} creation (got restrictions on: {})",
column_family(),
fmt::join(non_pk_restrictions | std::views::keys | std::views::transform(std::mem_fn(&column_definition::name_as_text)), ", ")));
fmt::join(illegal_non_pk_restrictions | std::views::transform(std::mem_fn(&column_definition::name_as_text)), ", ")));
}
// IS NOT NULL restrictions are handled separately from other restrictions.

View File

@@ -3124,6 +3124,22 @@ BOOST_AUTO_TEST_CASE(evaluate_binary_operator_is_not) {
BOOST_REQUIRE_EQUAL(evaluate(empty_is_not_null, evaluation_inputs{}), make_bool_raw(true));
}
BOOST_AUTO_TEST_CASE(evaluate_binary_operator_is) {
expression false_is_binop = binary_operator(make_int_const(1), oper_t::IS, constant::make_null(int32_type));
BOOST_REQUIRE_EQUAL(evaluate(false_is_binop, evaluation_inputs{}), make_bool_raw(false));
expression true_is_binop =
binary_operator(constant::make_null(int32_type), oper_t::IS, constant::make_null(int32_type));
BOOST_REQUIRE_EQUAL(evaluate(true_is_binop, evaluation_inputs{}), make_bool_raw(true));
expression forbidden_is_binop = binary_operator(make_int_const(1), oper_t::IS, make_int_const(2));
BOOST_REQUIRE_THROW(evaluate(forbidden_is_binop, evaluation_inputs{}), exceptions::invalid_request_exception);
expression empty_is_null =
binary_operator(make_empty_const(int32_type), oper_t::IS, constant::make_null(int32_type));
BOOST_REQUIRE_EQUAL(evaluate(empty_is_null, evaluation_inputs{}), make_bool_raw(false));
}
BOOST_AUTO_TEST_CASE(evaluate_binary_operator_like) {
expression true_like_binop = binary_operator(make_text_const("some_text"), oper_t::LIKE, make_text_const("some_%"));
BOOST_REQUIRE_EQUAL(evaluate(true_like_binop, evaluation_inputs{}), make_bool_raw(true));
@@ -3768,6 +3784,8 @@ enum struct expected_rhs_type {
float_in_list,
// list<tuple<float, int, text, double>
multi_column_tuple_in_list,
// IS allows only NULL as the RHS, everything else is invalid
is_null_rhs,
// IS_NOT allows only NULL as the RHS, everything else is invalid
is_not_null_rhs
};
@@ -4489,6 +4507,28 @@ BOOST_AUTO_TEST_CASE(prepare_binary_operator_is_not_null) {
}
}
BOOST_AUTO_TEST_CASE(prepare_binary_operator_is_null) {
schema_ptr table_schema = schema_builder("test_ks", "test_cf")
.with_column("pk", int32_type, column_kind::partition_key)
.with_column("float_col", float_type, column_kind::regular_column)
.build();
auto [db, db_data] = make_data_dictionary_database(table_schema);
for (const comparison_order& comp_order : get_possible_comparison_orders()) {
expression to_prepare =
binary_operator(unresolved_identifier{.ident = ::make_shared<column_identifier_raw>("float_col", false)},
oper_t::IS, make_null_untyped(), comp_order);
expression expected = binary_operator(column_value(table_schema->get_column_definition("float_col")),
oper_t::IS, constant::make_null(float_type), comp_order);
test_prepare_good_binary_operator(to_prepare, expected, db, table_schema);
test_prepare_binary_operator_invalid_rhs_values(to_prepare, expected_rhs_type::is_null_rhs, db,
table_schema);
}
}
// `float_col = NULL`, `float_col < NULL`, ...
// The RHS should be prepared as a NULL constant with float type.
BOOST_AUTO_TEST_CASE(prepare_binary_operator_with_null_rhs) {

252
test/cqlpy/test_is_null.py Normal file
View File

@@ -0,0 +1,252 @@
# Copyright 2025-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#############################################################################
# Tests for IS NULL and IS NOT NULL in WHERE clauses
#############################################################################
import pytest
from cassandra.protocol import InvalidRequest, SyntaxException
from .util import new_test_table, unique_name, unique_key_int
@pytest.fixture(scope="module")
def table1(cql, test_keyspace):
"""Shared table for tests with the same schema"""
table = test_keyspace + "." + unique_name()
cql.execute(f"CREATE TABLE {table} (p int, c int, v int, s text, PRIMARY KEY (p, c))")
yield table
cql.execute(f"DROP TABLE {table}")
def test_is_null_regular_column(cql, table1):
"""Test IS NULL on regular columns"""
p = unique_key_int()
# Insert some test data
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 1, 10, 'a')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 2, 20, 'b')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 3, 30, NULL)") # s is null
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 4, NULL, 'c')") # v is null
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 5, NULL, NULL)") # both v and s are null
# Test IS NULL on regular column with ALLOW FILTERING
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND v IS NULL ALLOW FILTERING"))
assert len(result) == 2
assert set((r.c for r in result)) == {4, 5}
# Test IS NULL on text column
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND s IS NULL ALLOW FILTERING"))
assert len(result) == 2
assert set((r.c for r in result)) == {3, 5}
def test_is_not_null_regular_column(cql, table1):
"""Test IS NOT NULL on regular columns"""
p = unique_key_int()
# Insert some test data
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 1, 10, 'a')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 2, 20, 'b')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 3, 30, NULL)") # s is null
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 4, NULL, 'c')") # v is null
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 5, NULL, NULL)") # both v and s are null
# Test IS NOT NULL on regular column with ALLOW FILTERING
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND v IS NOT NULL ALLOW FILTERING"))
assert len(result) == 3
assert set((r.c for r in result)) == {1, 2, 3}
# Test IS NOT NULL on text column
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND s IS NOT NULL ALLOW FILTERING"))
assert len(result) == 3
assert set((r.c for r in result)) == {1, 2, 4}
def test_is_null_with_prepared_statement(cql, table1):
"""Test IS NULL with prepared statements"""
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 1, 10, 'a')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 2, 20, NULL)") # s is null
stmt = cql.prepare(f"SELECT * FROM {table1} WHERE p = ? AND s IS NULL ALLOW FILTERING")
result = list(cql.execute(stmt, [p]))
assert len(result) == 1
assert result[0].c == 2
def test_is_not_null_with_prepared_statement(cql, table1):
"""Test IS NOT NULL with prepared statements"""
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 1, 10, 'a')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 2, 20, NULL)") # s is null
stmt = cql.prepare(f"SELECT * FROM {table1} WHERE p = ? AND s IS NOT NULL ALLOW FILTERING")
result = list(cql.execute(stmt, [p]))
assert len(result) == 1
assert result[0].c == 1
def test_is_null_combined_with_other_restrictions(cql, table1):
"""Test IS NULL combined with other WHERE conditions"""
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 1, 10, 'a')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 2, 20, NULL)") # s is null
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 3, 30, NULL)") # s is null
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 4, NULL, 'd')") # v is null
# Combine IS NULL with value comparison
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND c > 1 AND s IS NULL ALLOW FILTERING"))
assert len(result) == 2
assert set((r.c for r in result)) == {2, 3}
# Multiple IS NULL conditions
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND v IS NULL AND s IS NOT NULL ALLOW FILTERING"))
assert len(result) == 1
assert result[0].c == 4
def test_is_null_on_partition_key(cql, table1):
"""Test IS NULL and IS NOT NULL on partition key columns
Key columns can never be null, so:
- IS NULL should always return no rows
- IS NOT NULL should always be true (may not require ALLOW FILTERING)
"""
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 1, 10, 'a')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 2, 20, 'b')")
# IS NULL on partition key should return no rows (keys can never be null)
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p IS NULL ALLOW FILTERING"))
assert len(result) == 0
# IS NOT NULL on partition key should match all rows
# Since partition keys can never be null, this is always true
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p IS NOT NULL ALLOW FILTERING"))
# This returns all rows from all partitions, so we should at least see our 2 rows
assert len([r for r in result if r.p == p]) == 2
def test_is_null_on_clustering_key(cql, table1):
"""Test IS NULL and IS NOT NULL on clustering key columns
Key columns can never be null, so:
- IS NULL should always return no rows
- IS NOT NULL should always be true (may not require ALLOW FILTERING)
"""
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 1, 10, 'a')")
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 2, 20, 'b')")
# IS NULL on clustering key should return no rows (keys can never be null)
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND c IS NULL ALLOW FILTERING"))
assert len(result) == 0
# IS NOT NULL on clustering key with partition key specified
# Since clustering keys can never be null, this is always true
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND c IS NOT NULL ALLOW FILTERING"))
assert len(result) == 2
def test_is_null_on_compound_partition_key(cql, test_keyspace):
"""Test IS NULL and IS NOT NULL on compound partition key columns"""
with new_test_table(cql, test_keyspace, "p1 int, p2 int, c int, v int, PRIMARY KEY ((p1, p2), c)") as table:
cql.execute(f"INSERT INTO {table} (p1, p2, c, v) VALUES (1, 2, 1, 10)")
cql.execute(f"INSERT INTO {table} (p1, p2, c, v) VALUES (1, 2, 2, 20)")
# IS NULL on first partition key component should return no rows
result = list(cql.execute(f"SELECT * FROM {table} WHERE p1 IS NULL ALLOW FILTERING"))
assert len(result) == 0
# IS NULL on second partition key component should return no rows
result = list(cql.execute(f"SELECT * FROM {table} WHERE p2 IS NULL ALLOW FILTERING"))
assert len(result) == 0
# IS NOT NULL on partition key components should match all rows
result = list(cql.execute(f"SELECT * FROM {table} WHERE p1 IS NOT NULL ALLOW FILTERING"))
assert len(result) == 2
result = list(cql.execute(f"SELECT * FROM {table} WHERE p2 IS NOT NULL ALLOW FILTERING"))
assert len(result) == 2
def test_is_null_without_filtering_error(cql, table1):
"""Test that IS NULL without ALLOW FILTERING raises an error"""
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p, c, v) VALUES ({p}, 1, 10)")
# IS NULL on regular column without ALLOW FILTERING should fail
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND v IS NULL")
def test_is_not_null_without_filtering_error(cql, table1):
"""Test that IS NOT NULL without ALLOW FILTERING raises an error"""
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p, c, v) VALUES ({p}, 1, 10)")
# IS NOT NULL on regular column without ALLOW FILTERING should fail
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND v IS NOT NULL")
def test_is_null_with_invalid_syntax(cql, table1):
"""Test that IS NULL only accepts NULL as RHS"""
p = unique_key_int()
# IS NULL with non-null value should fail with syntax error
# (the grammar only allows IS NULL, not IS <value>)
with pytest.raises(SyntaxException):
cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND v IS 123 ALLOW FILTERING")
# IS NOT with non-null value should fail with syntax error
with pytest.raises(SyntaxException):
cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND v IS NOT 123 ALLOW FILTERING")
def test_null_equality_returns_empty(cql, table1):
"""Test that WHERE x = null returns no results (as per SQL semantics)"""
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p, c, v, s) VALUES ({p}, 1, NULL, NULL)") # v is null
cql.execute(f"INSERT INTO {table1} (p, c, v) VALUES ({p}, 2, 10)")
# x = null should return nothing (not the same as IS NULL)
result = list(cql.execute(f"SELECT * FROM {table1} WHERE p = {p} AND v = null ALLOW FILTERING"))
assert len(result) == 0
def test_is_null_multiple_columns(cql, test_keyspace):
"""Test IS NULL on multiple columns"""
with new_test_table(cql, test_keyspace, "p int, c int, v1 int, v2 int, v3 text, PRIMARY KEY (p, c)") as table:
cql.execute(f"INSERT INTO {table} (p, c, v1, v2, v3) VALUES (1, 1, 10, 20, 'a')")
cql.execute(f"INSERT INTO {table} (p, c, v1, v2, v3) VALUES (1, 2, 10, 20, NULL)") # v3 is null
cql.execute(f"INSERT INTO {table} (p, c, v1, v2, v3) VALUES (1, 3, 10, NULL, NULL)") # v2 and v3 are null
cql.execute(f"INSERT INTO {table} (p, c, v1, v2, v3) VALUES (1, 4, NULL, NULL, NULL)") # all regular columns are null
# Test multiple IS NULL conditions
result = list(cql.execute(f"SELECT * FROM {table} WHERE p = 1 AND v2 IS NULL AND v3 IS NULL ALLOW FILTERING"))
assert len(result) == 2
assert set((r.c for r in result)) == {3, 4}
# Mix IS NULL and IS NOT NULL
result = list(cql.execute(f"SELECT * FROM {table} WHERE p = 1 AND v1 IS NOT NULL AND v2 IS NULL ALLOW FILTERING"))
assert len(result) == 1
assert result[0].c == 3
def test_is_null_empty_vs_null(cql, test_keyspace):
"""Test that empty string is not treated as NULL"""
with new_test_table(cql, test_keyspace, "p int, c int, s text, PRIMARY KEY (p, c)") as table:
cql.execute(f"INSERT INTO {table} (p, c, s) VALUES (1, 1, '')") # empty string
cql.execute(f"INSERT INTO {table} (p, c, s) VALUES (1, 2, NULL)") # null
cql.execute(f"INSERT INTO {table} (p, c, s) VALUES (1, 3, NULL)") # also null
# IS NULL should only return truly null values, not empty strings
result = list(cql.execute(f"SELECT * FROM {table} WHERE p = 1 AND s IS NULL ALLOW FILTERING"))
assert len(result) == 2
assert set((r.c for r in result)) == {2, 3}
# IS NOT NULL should include empty string
result = list(cql.execute(f"SELECT * FROM {table} WHERE p = 1 AND s IS NOT NULL ALLOW FILTERING"))
assert len(result) == 1
assert result[0].c == 1
assert result[0].s == ''

View File

@@ -300,29 +300,34 @@ def test_is_not_operator_must_be_null(cql, test_keyspace):
finally:
cql.execute(f"DROP MATERIALIZED VIEW IF EXISTS {test_keyspace}.{mv}")
# The IS NOT NULL operator was first added to Cassandra and Scylla for use
# just in key columns in materialized views. It was not supported in general
# filters in SELECT (see issue #8517), and in particular cannot be used in
# a materialized-view definition as a filter on non-key columns. However,
# if this usage is not allowed, we expect to see a clear error and not silently
# ignoring the IS NOT NULL condition as happens in issue #10365.
#
# NOTE: if issue #8517 (IS NOT NULL in filters) is implemented, we will need to
# replace this test by a test that checks that the filter works as expected,
# both in ordinary base-table SELECT and in materialized-view definition.
def test_is_not_null_forbidden_in_filter(cql, test_keyspace, cassandra_bug):
# The IS NULL and IS NOT NULL operators are now supported in WHERE clauses
# for regular SELECT queries (issue #8517). This test verifies that they work
# correctly in both regular base-table SELECT and in materialized-view definitions.
# Note: In materialized view definitions, IS NULL and IS NOT NULL are not allowed
# on non-key columns (filtering on non-key columns isn't supported at all - issue #4250).
# For key columns in view definitions, IS NOT NULL must be used (IS NULL is not allowed).
# However, both IS NULL and IS NOT NULL should work in regular WHERE clauses on base tables.
# Cassandra doesn't support this feature -
# for example, for this query executed on a regular (base) table:
# "SELECT x FROM y WHERE z IS NOT NULL ..."
# Cassandra reports an InvalidRequest: "Unsupported restriction: z IS NOT NULL"
def test_is_null_and_is_not_null_in_filter(cql, test_keyspace, scylla_only):
with new_test_table(cql, test_keyspace, 'p int primary key, xyz int') as table:
# Check that "IS NOT NULL" is not supported in a regular (base table)
# SELECT filter. Cassandra reports an InvalidRequest: "Unsupported
# restriction: xyz IS NOT NULL". In Scylla the message is different:
# "restriction '(xyz) IS NOT { null }' is only supported in materialized
# view creation".
#
with pytest.raises(InvalidRequest, match="xyz"):
cql.execute(f'SELECT * FROM {table} WHERE xyz IS NOT NULL ALLOW FILTERING')
# Check that "xyz IS NOT NULL" is also not supported in a
# materialized-view definition (where xyz is not a key column)
# Reproduces #8517
# Insert test data
cql.execute(f"INSERT INTO {table} (p, xyz) VALUES (123, 456)")
cql.execute(f"INSERT INTO {table} (p) VALUES (124)") # xyz is null
cql.execute(f"INSERT INTO {table} (p, xyz) VALUES (125, 789)")
# Test IS NOT NULL in regular SELECT - should return rows where xyz is not null
result = list(cql.execute(f'SELECT p FROM {table} WHERE xyz IS NOT NULL ALLOW FILTERING'))
assert sorted(result) == [(123,), (125,)]
# Test IS NULL in regular SELECT - should return rows where xyz is null
result = list(cql.execute(f'SELECT p FROM {table} WHERE xyz IS NULL ALLOW FILTERING'))
assert sorted(result) == [(124,)]
# Test that IS NOT NULL on non-key columns in materialized views is still
# not supported (only primary key columns can have IS NOT NULL in view definitions)
mv = unique_name()
try:
with pytest.raises(InvalidRequest, match="xyz"):

View File

@@ -2261,3 +2261,52 @@ def test_tombstone_gc_property_unnamed_index(cql, test_keyspace, scylla_only):
desc_row = cql.execute(f"DESCRIBE MATERIALIZED VIEW {table}_v_idx_index").one()
assert "tombstone_gc = {" in desc_row.create_statement
# Test that IS NULL and IS NOT NULL operators work on indexed columns
# but require ALLOW FILTERING since secondary indexes cannot efficiently
# support null checks. The materialized view backing the secondary index
# has a partition for each value of the indexed column, but does not have
# a partition for the NULL key (NULL cannot be a key in a materialized view).
# Reproducer for issue #8517
def test_is_null_with_secondary_index(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
# Create secondary index on v
cql.execute(f"CREATE INDEX ON {table}(v)")
# Insert test data with some NULL values in indexed column
cql.execute(f"INSERT INTO {table} (p, c, v) VALUES (1, 1, 10)")
cql.execute(f"INSERT INTO {table} (p, c, v) VALUES (1, 2, 20)")
cql.execute(f"INSERT INTO {table} (p, c, v) VALUES (1, 3, NULL)")
cql.execute(f"INSERT INTO {table} (p, c, v) VALUES (2, 1, 10)")
cql.execute(f"INSERT INTO {table} (p, c, v) VALUES (2, 2, NULL)")
# Wait for index to be built
time.sleep(0.5)
# IS NULL on indexed column should require ALLOW FILTERING
# because the index cannot efficiently support it
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
cql.execute(f"SELECT * FROM {table} WHERE v IS NULL")
# IS NOT NULL on indexed column should also require ALLOW FILTERING
with pytest.raises(InvalidRequest, match='ALLOW FILTERING'):
cql.execute(f"SELECT * FROM {table} WHERE v IS NOT NULL")
# With ALLOW FILTERING, IS NULL should work correctly
result = list(cql.execute(f"SELECT * FROM {table} WHERE v IS NULL ALLOW FILTERING"))
assert len(result) == 2
assert set((r.p, r.c) for r in result) == {(1, 3), (2, 2)}
# With ALLOW FILTERING, IS NOT NULL should work correctly
result = list(cql.execute(f"SELECT * FROM {table} WHERE v IS NOT NULL ALLOW FILTERING"))
assert len(result) == 3
assert set((r.p, r.c) for r in result) == {(1, 1), (1, 2), (2, 1)}
# Can combine IS NULL with partition key restriction
result = list(cql.execute(f"SELECT * FROM {table} WHERE p = 1 AND v IS NULL ALLOW FILTERING"))
assert len(result) == 1
assert result[0].c == 3
# Regular EQ query on indexed column should work without ALLOW FILTERING
result = list(cql.execute(f"SELECT * FROM {table} WHERE v = 10"))
assert len(result) == 2