Files
scylladb/test/cql-pytest/test_materialized_view.py
Nadav Har'El e8493e20cb cql-pytest: test for empty-string as partition key in materialized view
Scylla and Cassandra do not allow an empty string as a partition key,
but a materialized view might "convert" a regular string column into a
partition key, and an empty string is a perfectly valid value for this
column. This can result in a view row which has an empty string as a
partition key. This case works in Cassandra, but doesn't in Scylla (the
row with the empty string as a partition key doesn't appear). The
following test demonstrates this difference between Scylla and Cassandra
(it passes on Cassandra, fails on Scylla, and accordingly marked
"xfail").

Refs #9375.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20210922115000.290387-1-nyh@scylladb.com>
2021-09-22 18:55:25 +03:00

101 lines
5.0 KiB
Python

# Copyright 2021-present ScyllaDB
#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
# Tests for materialized views
import time
import pytest
from util import new_test_table, unique_name, new_materialized_view
from cassandra.protocol import InvalidRequest
# Test that building a view with a large value succeeds. Regression test
# for a bug where values larger than 10MB were rejected during building (#9047)
def test_build_view_with_large_row(cql, test_keyspace):
schema = 'p int, c int, v text, primary key (p,c)'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
big = 'x'*11*1024*1024
cql.execute(f"INSERT INTO {table}(p,c,v) VALUES (1,1,'{big}')")
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p IS NOT NULL AND c IS NOT NULL PRIMARY KEY (c,p)")
try:
retrieved_row = False
for _ in range(50):
res = [row for row in cql.execute(f"SELECT * FROM {test_keyspace}.{mv}")]
if len(res) == 1 and res[0].v == big:
retrieved_row = True
break
else:
time.sleep(0.1)
assert retrieved_row
finally:
cql.execute(f"DROP MATERIALIZED VIEW {test_keyspace}.{mv}")
# Test that updating a view with a large value succeeds. Regression test
# for a bug where values larger than 10MB were rejected during building (#9047)
def test_update_view_with_large_row(cql, test_keyspace):
schema = 'p int, c int, v text, primary key (p,c)'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p IS NOT NULL AND c IS NOT NULL PRIMARY KEY (c,p)")
try:
big = 'x'*11*1024*1024
cql.execute(f"INSERT INTO {table}(p,c,v) VALUES (1,1,'{big}')")
res = [row for row in cql.execute(f"SELECT * FROM {test_keyspace}.{mv}")]
assert len(res) == 1 and res[0].v == big
finally:
cql.execute(f"DROP MATERIALIZED VIEW {test_keyspace}.{mv}")
# Test that a `CREATE MATERIALIZED VIEW` request, that contains bind markers in
# its SELECT statement, fails gracefully with `InvalidRequest` exception and
# doesn't lead to a database crash.
def test_mv_select_stmt_bound_values(cql, test_keyspace):
schema = 'p int PRIMARY KEY'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
try:
with pytest.raises(InvalidRequest, match="CREATE MATERIALIZED VIEW"):
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p = ? PRIMARY KEY (p)")
finally:
cql.execute(f"DROP MATERIALIZED VIEW IF EXISTS {test_keyspace}.{mv}")
# In test_null.py::test_empty_string_key() we noticed that an empty string
# is not allowed as a partition key. However, an empty string is a valid
# value for a string column, so if we have a materialized view with this
# string column becoming the view's partition key - the empty string may end
# up being the view row's partition key! This case should be supported,
# because the "IS NOT NULL" clause in the view's declaration does not
# eliminate this row (an empty string is not considered NULL).
# This reproduces issue #9375.
@pytest.mark.xfail(reason="issue #9375")
def test_mv_empty_string_partition_key(cql, test_keyspace):
schema = 'p int, v text, primary key (p)'
with new_test_table(cql, test_keyspace, schema) as table:
with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv:
cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, '')")
# Note that because cql-pytest runs on a single node, view
# updates are synchronous, and we can read the view immediately
# without retrying. In a general setup, this test would require
# retries.
# The view row with the empty partition key should exist.
# In #9375, this failed in Scylla:
assert list(cql.execute(f"SELECT * FROM {mv}")) == [('', 123)]
# However, it is still impossible to select just this row,
# because Cassandra forbids an empty partition key on select
with pytest.raises(InvalidRequest, match='Key may not be empty'):
cql.execute(f"SELECT * FROM {mv} WHERE v=''")