Files
scylladb/test/cql-pytest/test_materialized_view.py
Nadav Har'El 5d2f694a90 cql3: fix cql3::util::maybe_quote() for keywords
cql3::util::maybe_quote() is a utility function formatting an identifier
name (table name, column name, etc.) that needs to be embedded in a CQL
statement - and might require quoting if it contains non-alphanumeric
characters, uppercase characters, or a CQL keyword.

maybe_quote() made an effort to only quote the identifier name if neccessary,
e.g., a lowercase name usually does not need quoting. But lowercase names
that are CQL keywords - e.g., to or where - cannot be used as identifiers
without quoting. This can cause problems for code that wants to generate
CQL statements, such as the materialized-view problem in issue #9450 - where
a user had a column called "to" and wanted to create a materialized view
for it.

So in this patch we fix maybe_quote() to recognize invalid identifiers by
using the CQL parser, and quote them. This will quote reserved keywords,
but not so-called unreserved keywords, which *are* allowed as identifiers
and don't need quoting. This addition slows down maybe_quote(), but
maybe_quote() is anyway only used in heavy operations which need to
generate CQL.

This patch also adds two tests that reproduce the bug and verify its
fix:

1. Add to the low-level maybe_quote() test (a C++ unit test) also tests
   that maybe_quote() quotes reserved keywords like "to", but doesn't
   quote unreserved keywords like "int".

2. Add a test reproducing issue #9450 - creating a materialized view
   whose key column is a keyword. This new test passes on Cassandra,
   failed on Scylla before this patch, and passes after this patch.

It is worth noting that maybe_quote() now has a "forward compatiblity"
problem: If we save CQL statements generated by maybe_quote(), and a
future version introduces a new reserved keyword, the parser of the
future version may not be able to parse the saved CQL statement that
was generated with the old mayb_quote() and didn't quote what is now
a keyword. This problem can be solved in two ways:

1. Try hard not to introduced new reserved keywords. Instead, introduce
   unreserved keywords. We've been doing this even before recognizing
   this maybe_quote() future-compatibility problem.

2. In the next patch we will introduce quote() - which unconditionally
   quotes identifier names, even if lowercase. These quoted names will
   be uglier for lowercase names - but will be safe from future
   introduction of new keywords. So we can consider switching some or
   all uses of maybe_quote() to quote().

Fixes #9450

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20220118161217.231811-1-nyh@scylladb.com>
2022-02-07 11:33:56 +02:00

122 lines
6.7 KiB
Python

# Copyright 2021-present ScyllaDB
#
# SPDX-License-Identifier: AGPL-3.0-or-later
# Tests for materialized views
import time
import pytest
from util import new_test_table, unique_name, new_materialized_view
from cassandra.protocol import InvalidRequest
# Test that building a view with a large value succeeds. Regression test
# for a bug where values larger than 10MB were rejected during building (#9047)
def test_build_view_with_large_row(cql, test_keyspace):
schema = 'p int, c int, v text, primary key (p,c)'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
big = 'x'*11*1024*1024
cql.execute(f"INSERT INTO {table}(p,c,v) VALUES (1,1,'{big}')")
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p IS NOT NULL AND c IS NOT NULL PRIMARY KEY (c,p)")
try:
retrieved_row = False
for _ in range(50):
res = [row for row in cql.execute(f"SELECT * FROM {test_keyspace}.{mv}")]
if len(res) == 1 and res[0].v == big:
retrieved_row = True
break
else:
time.sleep(0.1)
assert retrieved_row
finally:
cql.execute(f"DROP MATERIALIZED VIEW {test_keyspace}.{mv}")
# Test that updating a view with a large value succeeds. Regression test
# for a bug where values larger than 10MB were rejected during building (#9047)
def test_update_view_with_large_row(cql, test_keyspace):
schema = 'p int, c int, v text, primary key (p,c)'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p IS NOT NULL AND c IS NOT NULL PRIMARY KEY (c,p)")
try:
big = 'x'*11*1024*1024
cql.execute(f"INSERT INTO {table}(p,c,v) VALUES (1,1,'{big}')")
res = [row for row in cql.execute(f"SELECT * FROM {test_keyspace}.{mv}")]
assert len(res) == 1 and res[0].v == big
finally:
cql.execute(f"DROP MATERIALIZED VIEW {test_keyspace}.{mv}")
# Test that a `CREATE MATERIALIZED VIEW` request, that contains bind markers in
# its SELECT statement, fails gracefully with `InvalidRequest` exception and
# doesn't lead to a database crash.
def test_mv_select_stmt_bound_values(cql, test_keyspace):
schema = 'p int PRIMARY KEY'
mv = unique_name()
with new_test_table(cql, test_keyspace, schema) as table:
try:
with pytest.raises(InvalidRequest, match="CREATE MATERIALIZED VIEW"):
cql.execute(f"CREATE MATERIALIZED VIEW {test_keyspace}.{mv} AS SELECT * FROM {table} WHERE p = ? PRIMARY KEY (p)")
finally:
cql.execute(f"DROP MATERIALIZED VIEW IF EXISTS {test_keyspace}.{mv}")
# In test_null.py::test_empty_string_key() we noticed that an empty string
# is not allowed as a partition key. However, an empty string is a valid
# value for a string column, so if we have a materialized view with this
# string column becoming the view's partition key - the empty string may end
# up being the view row's partition key! This case should be supported,
# because the "IS NOT NULL" clause in the view's declaration does not
# eliminate this row (an empty string is not considered NULL).
# This reproduces issue #9375.
@pytest.mark.xfail(reason="issue #9375")
def test_mv_empty_string_partition_key(cql, test_keyspace):
schema = 'p int, v text, primary key (p)'
with new_test_table(cql, test_keyspace, schema) as table:
with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv:
cql.execute(f"INSERT INTO {table} (p,v) VALUES (123, '')")
# Note that because cql-pytest runs on a single node, view
# updates are synchronous, and we can read the view immediately
# without retrying. In a general setup, this test would require
# retries.
# The view row with the empty partition key should exist.
# In #9375, this failed in Scylla:
assert list(cql.execute(f"SELECT * FROM {mv}")) == [('', 123)]
# However, it is still impossible to select just this row,
# because Cassandra forbids an empty partition key on select
with pytest.raises(InvalidRequest, match='Key may not be empty'):
cql.execute(f"SELECT * FROM {mv} WHERE v=''")
# Reproducer for issue #9450 - when a view's key column name is a (quoted)
# keyword, writes used to fail because they generated internally broken CQL
# with the column name not quoted.
def test_mv_quoted_column_names(cql, test_keyspace):
for colname in ['"dog"', '"Dog"', 'DOG', '"to"', 'int']:
with new_test_table(cql, test_keyspace, f'p int primary key, {colname} int') as table:
with new_materialized_view(cql, table, '*', f'{colname}, p', f'{colname} is not null and p is not null') as mv:
cql.execute(f'INSERT INTO {table} (p, {colname}) values (1, 2)')
# Validate that not only the write didn't fail, it actually
# write the right thing to the view. NOTE: on a single-node
# Scylla, view update is synchronous so we can just read and
# don't need to wait or retry.
assert list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]
# Same as test_mv_quoted_column_names above (reproducing issue #9450), just
# check *view building* - i.e., pre-existing data in the base table that
# needs to be copied to the view. The view building cannot return an error
# to the user, but can fail to write the desired data into the view.
def test_mv_quoted_column_names_build(cql, test_keyspace):
for colname in ['"dog"', '"Dog"', 'DOG', '"to"', 'int']:
with new_test_table(cql, test_keyspace, f'p int primary key, {colname} int') as table:
cql.execute(f'INSERT INTO {table} (p, {colname}) values (1, 2)')
with new_materialized_view(cql, table, '*', f'{colname}, p', f'{colname} is not null and p is not null') as mv:
# When Scylla's view builder fails as it did in issue #9450,
# there is no way to tell this state apart from a view build
# that simply hasn't completed (besides looking at the logs,
# which we don't). This means, unfortunately, that a failure
# of this test is slow - it needs to wait for a timeout.
start_time = time.time()
while time.time() < start_time + 30:
if list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]:
break
assert list(cql.execute(f'SELECT * from {mv}')) == [(2, 1)]