Files
scylladb/test/cql-pytest/test_materialized_view.py
Piotr Sarna 277aa30965 cql-pytest: extend synchronous mv test with new cases
The new cases cover:
 - a materialized view created with synchronous updates from the start
 - a materialized view created with synchronous updates,
   but then alter to not have synchronous updates anymore
2022-07-25 10:00:28 +02:00

262 lines
16 KiB
Python

# Copyright 2021-present ScyllaDB
#
# SPDX-License-Identifier: AGPL-3.0-or-later
# Tests for materialized views
import time
import pytest
from util import new_test_table, unique_name, new_materialized_view
from cassandra.protocol import InvalidRequest, SyntaxException
import nodetool
# Test that building a view with a large value succeeds. Regression test
# for a bug where values larger than 10MB were rejected during building (#9047)
def test_build_view_with_large_row(cql, test_keyspace):
schema = 'p int, c int, v text, primary key (p,c)'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
big = 'x'*11*1024*1024
cql.execute(f"INSERT INTO {table}(p,c,v) VALUES (1,1,'{big}')")
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p IS NOT NULL AND c IS NOT NULL PRIMARY KEY (c,p)")
try:
retrieved_row = False
for _ in range(50):
res = [row for row in cql.execute(f"SELECT * FROM {test_keyspace}.{mv}")]
if len(res) == 1 and res[0].v == big:
retrieved_row = True
break
else:
time.sleep(0.1)
assert retrieved_row
finally:
cql.execute(f"DROP MATERIALIZED VIEW {test_keyspace}.{mv}")
# Test that updating a view with a large value succeeds. Regression test
# for a bug where values larger than 10MB were rejected during building (#9047)
def test_update_view_with_large_row(cql, test_keyspace):
schema = 'p int, c int, v text, primary key (p,c)'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p IS NOT NULL AND c IS NOT NULL PRIMARY KEY (c,p)")
try:
big = 'x'*11*1024*1024
cql.execute(f"INSERT INTO {table}(p,c,v) VALUES (1,1,'{big}')")
res = [row for row in cql.execute(f"SELECT * FROM {test_keyspace}.{mv}")]
assert len(res) == 1 and res[0].v == big
finally:
cql.execute(f"DROP MATERIALIZED VIEW {test_keyspace}.{mv}")
# Test that a `CREATE MATERIALIZED VIEW` request, that contains bind markers in
# its SELECT statement, fails gracefully with `InvalidRequest` exception and
# doesn't lead to a database crash.
def test_mv_select_stmt_bound_values(cql, test_keyspace):
schema = 'p int PRIMARY KEY'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
try:
with pytest.raises(InvalidRequest, match="CREATE MATERIALIZED VIEW"):
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p = ? PRIMARY KEY (p)")
finally:
cql.execute(f"DROP MATERIALIZED VIEW IF EXISTS {test_keyspace}.{mv}")
# In test_null.py::test_empty_string_key() we noticed that an empty string
# is not allowed as a partition key. However, an empty string is a valid
# value for a string column, so if we have a materialized view with this
# string column becoming the view's partition key - the empty string may end
# up being the view row's partition key. This case should be supported,
# because the "IS NOT NULL" clause in the view's declaration does not
# eliminate this row (an empty string is *not* considered NULL).
# Reproduces issue #9375.
def test_mv_empty_string_partition_key(cql, test_keyspace):
schema = 'p int, v text, primary key (p)'
with new_test_table(cql, test_keyspace, schema) as table:
with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv:
cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, '')")
# Note that because cql-pytest runs on a single node, view
# updates are synchronous, and we can read the view immediately
# without retrying. In a general setup, this test would require
# retries.
# The view row with the empty partition key should exist.
# In #9375, this failed in Scylla:
assert list(cql.execute(f"SELECT * FROM {mv}")) == [('', 123)]
# Verify that we can flush an sstable with just an one partition
# with an empty-string key (in the past we had a summary-file
# sanity check preventing this from working).
nodetool.flush(cql, mv)
# Reproducer for issue #9450 - when a view's key column name is a (quoted)
# keyword, writes used to fail because they generated internally broken CQL
# with the column name not quoted.
def test_mv_quoted_column_names(cql, test_keyspace):
for colname in ['"dog"', '"Dog"', 'DOG', '"to"', 'int']:
with new_test_table(cql, test_keyspace, f'p int primary key, {colname} int') as table:
with new_materialized_view(cql, table, '*', f'{colname}, p', f'{colname} is not null and p is not null') as mv:
cql.execute(f'INSERT INTO {table} (p, {colname}) values (1, 2)')
# Validate that not only the write didn't fail, it actually
# write the right thing to the view. NOTE: on a single-node
# Scylla, view update is synchronous so we can just read and
# don't need to wait or retry.
assert list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]
# Same as test_mv_quoted_column_names above (reproducing issue #9450), just
# check *view building* - i.e., pre-existing data in the base table that
# needs to be copied to the view. The view building cannot return an error
# to the user, but can fail to write the desired data into the view.
def test_mv_quoted_column_names_build(cql, test_keyspace):
for colname in ['"dog"', '"Dog"', 'DOG', '"to"', 'int']:
with new_test_table(cql, test_keyspace, f'p int primary key, {colname} int') as table:
cql.execute(f'INSERT INTO {table} (p, {colname}) values (1, 2)')
with new_materialized_view(cql, table, '*', f'{colname}, p', f'{colname} is not null and p is not null') as mv:
# When Scylla's view builder fails as it did in issue #9450,
# there is no way to tell this state apart from a view build
# that simply hasn't completed (besides looking at the logs,
# which we don't). This means, unfortunately, that a failure
# of this test is slow - it needs to wait for a timeout.
start_time = time.time()
while time.time() < start_time + 30:
if list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]:
break
assert list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]
# The previous test (test_mv_empty_string_partition_key) verifies that a
# row with an empty-string partition key can appear in the view. This was
# checked with a full-table scan. This test is about reading this one
# view partition individually, with WHERE v=''.
# Surprisingly, Cassandra does NOT allow to SELECT this specific row
# individually - "WHERE v=''" is not allowed when v is the partition key
# (even of a view). We consider this to be a Cassandra bug - it doesn't
# make sense to allow the user to add a row and to see it in a full-table
# scan, but not to query it individually. This is why we mark this test as
# a Cassandra bug and want Scylla to pass it.
# Reproduces issue #9375 and #9352.
def test_mv_empty_string_partition_key_individual(cassandra_bug, cql, test_keyspace):
schema = 'p int, v text, primary key (p)'
with new_test_table(cql, test_keyspace, schema) as table:
with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv:
# Insert a bunch of (p,v) rows. One of the v's is the empty
# string, which we would like to test, but let's insert more
# rows to make it more likely to exercise various possibilities
# of token ordering (see #9352).
rows = [[123, ''], [1, 'dog'], [2, 'cat'], [700, 'hello'], [3, 'horse']]
for row in rows:
cql.execute(f"INSERT INTO {table} (p,v) VALUES ({row[0]}, '{row[1]}')")
# Note that because cql-pytest runs on a single node, view
# updates are synchronous, and we can read the view immediately
# without retrying. In a general setup, this test would require
# retries.
# Check that we can read the individual partition with the
# empty-string key:
assert list(cql.execute(f"SELECT * FROM {mv} WHERE v=''")) == [('', 123)]
# The SELECT above works from cache. However, empty partition
# keys also used to be special-cased and be buggy when reading
# and writing sstables, so let's verify that the empty partition
# key can actually be written and read from disk, by forcing a
# memtable flush and bypassing the cache on read.
# In the past Scylla used to fail this flush because the sstable
# layer refused to write empty partition keys to the sstable:
nodetool.flush(cql, mv)
# First try a full-table scan, and then try to read the
# individual partition with the empty key:
assert set(cql.execute(f"SELECT * FROM {mv} BYPASS CACHE")) == {
(x[1], x[0]) for x in rows}
# Issue #9352 used to prevent us finding WHERE v='' here, even
# when the data is known to exist (the above full-table scan
# saw it!) and despite the fact that WHERE v='' is parsed
# correctly because we tested above it works from memtables.
assert list(cql.execute(f"SELECT * FROM {mv} WHERE v='' BYPASS CACHE")) == [('', 123)]
# Test that the "IS NOT NULL" clause in the materialized view's SELECT
# functions as expected - namely, rows which have their would-be view
# key column unset (aka null) do not get copied into the view.
def test_mv_is_not_null(cql, test_keyspace):
schema = 'p int, v text, primary key (p)'
with new_test_table(cql, test_keyspace, schema) as table:
with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv:
cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, 'dog')")
cql.execute(f"INSERT INTO {table} (p,v) VALUES (17, null)")
# Note that because cql-pytest runs on a single node, view
# updates are synchronous, and we can read the view immediately
# without retrying. In a general setup, this test would require
# retries.
# The row with 123 should appear in the view, but the row with
# 17 should not, because v *is* null.
assert list(cql.execute(f"SELECT * FROM {mv}")) == [('dog', 123)]
# The view row should disappear and reappear if its key is
# changed to null and back in the base table:
cql.execute(f"UPDATE {table} SET v=null WHERE p=123")
assert list(cql.execute(f"SELECT * FROM {mv}")) == []
cql.execute(f"UPDATE {table} SET v='cat' WHERE p=123")
assert list(cql.execute(f"SELECT * FROM {mv}")) == [('cat', 123)]
cql.execute(f"DELETE v FROM {table} WHERE p=123")
assert list(cql.execute(f"SELECT * FROM {mv}")) == []
# Refs #10851. The code used to create a wildcard selection for all columns,
# which erroneously also includes static columns if such are present in the
# base table. Currently views only operate on regular columns and the filtering
# code assumes that. Once we implement static column support for materialized
# views, this test case will be a nice regression test to ensure that everything still
# works if the static columns are *not* used in the view.
# This test goes over all combinations of filters for partition, clustering and regular
# base columns.
def test_filter_with_unused_static_column(cql, test_keyspace, scylla_only):
schema = 'p int, c int, v int, s int static, primary key (p,c)'
with new_test_table(cql, test_keyspace, schema) as table:
for p_condition in ['p = 42', 'p IS NOT NULL']:
for c_condition in ['c = 43', 'c IS NOT NULL']:
for v_condition in ['v = 44', 'v IS NOT NULL']:
where = f"{p_condition} AND {c_condition} AND {v_condition}"
with new_materialized_view(cql, table, select='p,c,v', pk='p,c,v', where=where) as mv:
cql.execute(f"INSERT INTO {table} (p,c,v) VALUES (42,43,44)")
cql.execute(f"INSERT INTO {table} (p,c,v) VALUES (1,2,3)")
expected = [(42,43,44)] if '4' in where else [(42,43,44),(1,2,3)]
assert list(cql.execute(f"SELECT * FROM {mv}")) == expected
# IS_NOT operator can only be used in the context of materialized view creation and it must be of the form IS NOT NULL.
# Trying to do something like IS NOT 42 should fail.
# The error is a SyntaxException because Scylla and Cassandra check this during parsing.
def test_is_not_operator_must_be_null(cql, test_keyspace):
schema = 'p int PRIMARY KEY'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
try:
with pytest.raises(SyntaxException, match="NULL"):
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p IS NOT 42 PRIMARY KEY (p)")
finally:
cql.execute(f"DROP MATERIALIZED VIEW IF EXISTS {test_keyspace}.{mv}")
# Test that a view can be altered with synchronous_updates property and that
# the synchronous updates code path is then reached for such view.
def test_mv_synchronous_updates(cql, test_keyspace):
schema = 'p int, v text, primary key (p)'
with new_test_table(cql, test_keyspace, schema) as table:
with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as sync_mv, \
new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as async_mv, \
new_materialized_view(cql, table, '*', 'v,p', 'v is not null and p is not null', extra='with synchronous_updates = true') as sync_mv_from_the_start, \
new_materialized_view(cql, table, '*', 'v,p', 'v is not null and p is not null', extra='with synchronous_updates = true') as async_mv_altered:
# Make one view synchronous
cql.execute(f"ALTER MATERIALIZED VIEW {sync_mv} WITH synchronous_updates = true")
# Make another one asynchronous
cql.execute(f"ALTER MATERIALIZED VIEW {async_mv_altered} WITH synchronous_updates = false")
# Execute a query and inspect its tracing info
res = cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, 'dog')", trace=True)
trace = res.get_query_trace()
wanted_trace1 = f"Forcing {sync_mv} view update to be synchronous"
wanted_trace2 = f"Forcing {sync_mv_from_the_start} view update to be synchronous"
unwanted_trace1 = f"Forcing {async_mv} view update to be synchronous"
unwanted_trace2 = f"Forcing {async_mv_altered} view update to be synchronous"
wanted_traces_were_found = [False, False]
for event in trace.events:
assert unwanted_trace1 not in event.description
assert unwanted_trace2 not in event.description
if wanted_trace1 in event.description:
wanted_traces_were_found[0] = True
if wanted_trace2 in event.description:
wanted_traces_were_found[1] = True
assert all(wanted_traces_were_found)