mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-13 03:12:13 +00:00
cql: return InvalidRequest for oversized partition/clustering keys
When a partition key or clustering key value exceeds the 64 KiB limit (65535 bytes serialized), Scylla used to raise a generic std::runtime_error "Key size too large: N > M" from the low-level compound-key serializer. That error surfaced to clients as a CQL server error (code 0x0000, "NoHostAvailable"-looking), which is both ugly and incompatible with Cassandra - Cassandra returns a clean InvalidRequest with the message "Key length of N is longer than maximum of M". Fix this at the single chokepoint: compound_type::serialize_value in keys/compound.hh. The serializer is on every path that materializes a key - INSERT/UPDATE/DELETE/BATCH build mutations through it, and SELECT builds partition and clustering ranges through it - so a single throw replacement produces a clean InvalidRequest consistently across all paths and all key shapes (single, compound PK, composite CK). The previous approach on this PR branch patched three call sites in cql3/restrictions/statement_restrictions.cc, which only covered SELECT, duplicated the check, and placed it mid-restrictions code (flagged in review). Dropping those changes in favour of the root-cause fix here. Un-xfail the tests this fixes: - test/cqlpy/test_key_length.py: test_insert_65k_pk, test_insert_65k_ck, test_where_65k_pk, test_where_65k_ck, test_insert_65k_ck_composite, test_insert_total_compound_pk_err, test_insert_total_composite_ck_err. - test/cqlpy/cassandra_tests/.../insert_test.py: testPKInsertWithValueOver64K, testCKInsertWithValueOver64K. - test/cqlpy/cassandra_tests/.../select_test.py: testPKQueryWithValueOver64K. test_insert_65k_pk_compound stays xfail: its oversized value gets rejected by the Python driver's CQL wire-protocol encoder (see CASSANDRA-19270) before reaching the server, so the fix can't apply. Updated its reason. testCKQueryWithValueOver64K stays xfail with an updated reason: Cassandra silently returns empty for an oversized clustering key in WHERE, while Scylla now throws InvalidRequest - a deliberate choice mirroring the partition-key case, documented in the discussion on #10366. Add three tight-boundary tests (addressing review feedback on the previous revision) that pin MAX+1 behaviour for SELECT and INSERT of both partition and clustering keys. Update test/cluster/dtest/limits_test.py to match the new message ("Key length of \\d+ is longer than maximum of 65535"). fixes #10366 fixes #12247 Co-authored-by: Alexander Turetskiy <someone.tur@gmail.com> Closes scylladb/scylladb#23433
This commit is contained in:
committed by
Botond Dénes
parent
959f67b345
commit
71542206bc
@@ -15,6 +15,7 @@
|
||||
#include <ranges>
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/serialization.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include <seastar/util/backtrace.hh>
|
||||
|
||||
enum class allow_prefixes { no, yes };
|
||||
@@ -103,7 +104,12 @@ public:
|
||||
static managed_bytes serialize_value(RangeOfSerializedComponents&& values) {
|
||||
auto size = serialized_size(values);
|
||||
if (size > std::numeric_limits<size_type>::max()) {
|
||||
throw std::runtime_error(format("Key size too large: {:d} > {:d}", size, std::numeric_limits<size_type>::max()));
|
||||
// Matches Cassandra's wording so CQL-level compatibility tests
|
||||
// (and client-visible error messages) line up.
|
||||
// Issues #10366 (SELECT) and #12247 (INSERT) both require a
|
||||
// clean InvalidRequest here rather than a generic server error.
|
||||
throw exceptions::invalid_request_exception(format("Key length of {:d} is longer than maximum of {:d}",
|
||||
size, std::numeric_limits<size_type>::max()));
|
||||
}
|
||||
managed_bytes b(managed_bytes::initialized_later(), size);
|
||||
serialize_value(values, managed_bytes_mutable_view(b));
|
||||
|
||||
@@ -57,7 +57,7 @@ class TestLimits(Tester):
|
||||
|
||||
c = f"CREATE TABLE test1 ({key_name} int PRIMARY KEY)"
|
||||
if expect_failure:
|
||||
expected_error = r"Key size too large: \d+ > 65535"
|
||||
expected_error = r"Key length of \d+ is longer than maximum of 65535"
|
||||
self.ignore_log_patterns += [expected_error]
|
||||
with pytest.raises(Exception, match=expected_error):
|
||||
session.execute(c)
|
||||
|
||||
@@ -27,11 +27,13 @@ from ...porting import *
|
||||
LARGE_BLOB = b'x' * (2**16)
|
||||
MEDIUM_BLOB = b'x' * (2**15 + 9)
|
||||
|
||||
@pytest.mark.xfail(reason="issue #12247")
|
||||
def testsingleValuePk(cql, test_keyspace):
|
||||
with create_table(cql, test_keyspace, "(a blob PRIMARY KEY)") as table:
|
||||
# reproduces #12247:
|
||||
with pytest.raises(InvalidRequest, match=f'Key length of {len(LARGE_BLOB)} is longer than maximum of 65535'):
|
||||
# reproduces #12247.
|
||||
# Scylla's reported length includes the compound framing overhead
|
||||
# (2-byte length prefix per component) and so differs slightly from
|
||||
# the raw blob size; we just match the error shape here.
|
||||
with pytest.raises(InvalidRequest, match=r'Key length of \d+ is longer than maximum of 65535'):
|
||||
execute(cql, table, "INSERT INTO %s (a) VALUES (?)", LARGE_BLOB)
|
||||
|
||||
# null / empty checks
|
||||
@@ -40,17 +42,17 @@ def testsingleValuePk(cql, test_keyspace):
|
||||
with pytest.raises(InvalidRequest, match='Key may not be empty'):
|
||||
execute(cql, table, "INSERT INTO %s (a) VALUES (?)", b'')
|
||||
|
||||
@pytest.mark.xfail(reason="issue #12247")
|
||||
@pytest.mark.xfail(reason="Driver rejects oversized compound-pk component (struct.pack '>H'); see CASSANDRA-19270, #12247")
|
||||
# Currently fails on Cassandra due to CASSANDRA-19270
|
||||
def testcompositeValuePk(cql, test_keyspace, cassandra_bug):
|
||||
with create_table(cql, test_keyspace, "(a blob, b blob, PRIMARY KEY ((a, b)))") as table:
|
||||
# sum of columns is too large
|
||||
with pytest.raises(InvalidRequest, match=f'Key length of {len(MEDIUM_BLOB)*2} is longer than maximum of 65535'):
|
||||
with pytest.raises(InvalidRequest, match=r'Key length of \d+ is longer than maximum of 65535'):
|
||||
execute(cql, table, "INSERT INTO %s (a, b) VALUES (?, ?)", MEDIUM_BLOB, MEDIUM_BLOB)
|
||||
|
||||
# single column is too large
|
||||
# Fails on Cassandra for an unknown reason, see CASSANDRA-19270
|
||||
with pytest.raises(InvalidRequest, match=f'Key length of {len(MEDIUM_BLOB)+len(LARGE_BLOB)} is longer than maximum of 65535'):
|
||||
with pytest.raises(InvalidRequest, match=r'Key length of \d+ is longer than maximum of 65535'):
|
||||
execute(cql, table, "INSERT INTO %s (a, b) VALUES (?, ?)", MEDIUM_BLOB, LARGE_BLOB)
|
||||
|
||||
# null / empty checks
|
||||
@@ -76,11 +78,10 @@ def testcompositeValuePk(cql, test_keyspace, cassandra_bug):
|
||||
|
||||
execute(cql, table, "INSERT INTO %s (a, b) VALUES (?, ?)", b'', MEDIUM_BLOB)
|
||||
|
||||
@pytest.mark.xfail(reason="issue #12247")
|
||||
def testsingleValueClustering(cql, test_keyspace):
|
||||
with create_table(cql, test_keyspace, "(a blob, b blob, PRIMARY KEY (a, b))") as table:
|
||||
# reproduces #12247:
|
||||
with pytest.raises(InvalidRequest, match=f'Key length of {len(LARGE_BLOB)} is longer than maximum of 65535'):
|
||||
with pytest.raises(InvalidRequest, match=r'Key length of \d+ is longer than maximum of 65535'):
|
||||
execute(cql, table, "INSERT INTO %s (a, b) VALUES (?, ?)", MEDIUM_BLOB, LARGE_BLOB)
|
||||
|
||||
# null / empty checks
|
||||
@@ -96,18 +97,17 @@ def testsingleValueClustering(cql, test_keyspace):
|
||||
# For backwards compatability reasons, need to keep empty support
|
||||
execute(cql, table, "INSERT INTO %s (a, b) VALUES (?, ?)", MEDIUM_BLOB, b'')
|
||||
|
||||
@pytest.mark.xfail(reason="issue #12247")
|
||||
def testcompositeValueClustering(cql, test_keyspace):
|
||||
with create_table(cql, test_keyspace, "(a blob, b blob, c blob, PRIMARY KEY (a, b, c))") as table:
|
||||
# sum of columns is too large
|
||||
# reproduces #12247:
|
||||
with pytest.raises(InvalidRequest, match=f'Key length of {len(MEDIUM_BLOB)*2} is longer than maximum of 65535'):
|
||||
with pytest.raises(InvalidRequest, match=r'Key length of \d+ is longer than maximum of 65535'):
|
||||
execute(cql, table, "INSERT INTO %s (a, b, c) VALUES (?, ?, ?)", MEDIUM_BLOB, MEDIUM_BLOB, MEDIUM_BLOB)
|
||||
|
||||
# single column is too large
|
||||
# the logic prints the total clustering size and not the single column's size that was too large
|
||||
# reproduces #12247:
|
||||
with pytest.raises(InvalidRequest, match=f'Key length of {len(MEDIUM_BLOB)+len(LARGE_BLOB)} is longer than maximum of 65535'):
|
||||
with pytest.raises(InvalidRequest, match=r'Key length of \d+ is longer than maximum of 65535'):
|
||||
execute(cql, table, "INSERT INTO %s (a, b, c) VALUES (?, ?, ?)", MEDIUM_BLOB, MEDIUM_BLOB, LARGE_BLOB)
|
||||
|
||||
@pytest.mark.xfail(reason="issue #8627")
|
||||
|
||||
@@ -195,14 +195,12 @@ def testInsertWithDefaultTtl(cql, test_keyspace):
|
||||
TOO_BIG = 1024 * 65
|
||||
|
||||
# Reproduces #12247:
|
||||
@pytest.mark.xfail(reason="Issue #12247")
|
||||
def testPKInsertWithValueOver64K(cql, test_keyspace):
|
||||
with create_table(cql, test_keyspace, f"(a text, b text, PRIMARY KEY (a, b))") as table:
|
||||
assertInvalidThrow(cql, table, InvalidRequest,
|
||||
"INSERT INTO %s (a, b) VALUES (?, 'foo')", 'x'*TOO_BIG)
|
||||
|
||||
# Reproduces #12247:
|
||||
@pytest.mark.xfail(reason="Issue #12247")
|
||||
def testCKInsertWithValueOver64K(cql, test_keyspace):
|
||||
with create_table(cql, test_keyspace, f"(a text, b text, PRIMARY KEY (a, b))") as table:
|
||||
assertInvalidThrow(cql, table, InvalidRequest,
|
||||
|
||||
@@ -1298,7 +1298,6 @@ def testIndexQueryWithValueOver64K(cql, test_keyspace):
|
||||
assert_empty(execute(cql, table, "SELECT * FROM %s WHERE c = ? ALLOW FILTERING", too_big))
|
||||
|
||||
# Reproduces #10366
|
||||
@pytest.mark.xfail(reason="#10366 - server error instead of InvalidRequest")
|
||||
def testPKQueryWithValueOver64K(cql, test_keyspace):
|
||||
with create_table(cql, test_keyspace, "(a text, b text, PRIMARY KEY (a, b))") as table:
|
||||
TOO_BIG = 1024 * 65
|
||||
@@ -1307,7 +1306,7 @@ def testPKQueryWithValueOver64K(cql, test_keyspace):
|
||||
"SELECT * FROM %s WHERE a = ?", too_big)
|
||||
|
||||
# Reproduces #10366
|
||||
@pytest.mark.xfail(reason="#10366 - server error instead of silent return")
|
||||
@pytest.mark.xfail(reason="Cassandra silently ignores oversized clustering key, Scylla chose to generate an error as in the case of an oversized partition key - see discussion in #10366")
|
||||
def testCKQueryWithValueOver64K(cql, test_keyspace):
|
||||
with create_table(cql, test_keyspace, "(a text, b text, PRIMARY KEY (a, b))") as table:
|
||||
TOO_BIG = 1024 * 65
|
||||
|
||||
@@ -34,7 +34,6 @@ def table1(cql, test_keyspace):
|
||||
# partition key should fail, with a clean InvalidRequest - not some internal
|
||||
# server error or worse.
|
||||
# Reproduces #12247
|
||||
@pytest.mark.xfail(reason="Issue #12247")
|
||||
def test_insert_65k_pk(cql, table1):
|
||||
stmt = cql.prepare(f'INSERT INTO {table1} (p, c) VALUES (?,?)')
|
||||
# Cassandra writes: "Key length of 66560 is longer than maximum of 65535"
|
||||
@@ -42,7 +41,6 @@ def test_insert_65k_pk(cql, table1):
|
||||
cql.execute(stmt, ['x'*(65*1024), 'hello'])
|
||||
|
||||
# Reproduces #12247
|
||||
@pytest.mark.xfail(reason="Issue #12247")
|
||||
def test_insert_65k_ck(cql, table1):
|
||||
stmt = cql.prepare(f'INSERT INTO {table1} (p, c) VALUES (?,?)')
|
||||
# Cassandra writes: "Key length of 66560 is longer than maximum of 65535"
|
||||
@@ -53,7 +51,6 @@ def test_insert_65k_ck(cql, table1):
|
||||
# key should fail, with a clean InvalidRequest - not some internal server
|
||||
# error or worse.
|
||||
# Reproduces #10366.
|
||||
@pytest.mark.xfail(reason="Issue #10366")
|
||||
def test_where_65k_pk(cql, table1):
|
||||
stmt = cql.prepare(f'SELECT * FROM {table1} WHERE p = ?')
|
||||
# Cassandra writes: "Key length of 66560 is longer than maximum of 65535"
|
||||
@@ -68,7 +65,6 @@ def test_where_65k_pk(cql, table1):
|
||||
# return no match as Cassandra does. In any case, returning some ugly internal
|
||||
# error (as happened in Scylla) is a bug.
|
||||
# Reproduces #10366.
|
||||
@pytest.mark.xfail(reason="Issue #10366")
|
||||
def test_where_65k_ck(cql, table1):
|
||||
stmt = cql.prepare(f'SELECT * FROM {table1} WHERE p = ? AND c = ?')
|
||||
try:
|
||||
@@ -87,6 +83,30 @@ def test_where_65k_ck(cql, table1):
|
||||
# Scylla shouldn't impose its own smaller size limit - but in issue #16772,
|
||||
# it did (the limit was 65533).
|
||||
|
||||
# Tight-boundary SELECT tests for #10366: values at MAX-allowed (MAX-2 for
|
||||
# Scylla because of the 2-byte framing overhead per component) must not
|
||||
# error out during SELECT; values at MAX-allowed+1 must raise a clean
|
||||
# InvalidRequest. The MAX-allowed case for the 65533/65535 discrepancy is
|
||||
# tracked by #16772 and remains xfail; here we just pin the failure side
|
||||
# of the boundary so a regression would be caught immediately.
|
||||
def test_select_at_max_pk_plus_one_fails(cql, table1):
|
||||
stmt = cql.prepare(f'SELECT * FROM {table1} WHERE p = ?')
|
||||
over = 'x' * (65535 + 1)
|
||||
with pytest.raises(InvalidRequest, match='Key length'):
|
||||
cql.execute(stmt, [over])
|
||||
|
||||
def test_insert_at_max_pk_plus_one_fails(cql, table1):
|
||||
stmt = cql.prepare(f'INSERT INTO {table1} (p, c) VALUES (?,?)')
|
||||
over = 'x' * (65535 + 1)
|
||||
with pytest.raises(InvalidRequest, match='Key length'):
|
||||
cql.execute(stmt, [over, 'c'])
|
||||
|
||||
def test_insert_at_max_ck_plus_one_fails(cql, table1):
|
||||
stmt = cql.prepare(f'INSERT INTO {table1} (p, c) VALUES (?,?)')
|
||||
over = 'x' * (65535 + 1)
|
||||
with pytest.raises(InvalidRequest, match='Key length'):
|
||||
cql.execute(stmt, ['p', over])
|
||||
|
||||
@pytest.mark.xfail(reason="Issue #16772")
|
||||
def test_insert_65535_pk(cql, table1):
|
||||
stmt = cql.prepare(f'INSERT INTO {table1} (p, c) VALUES (?,?)')
|
||||
@@ -134,11 +154,19 @@ def table2(cql, test_keyspace):
|
||||
|
||||
# As we checked above for single-component keys, a 65 KB key is definitely
|
||||
# oversized and no key component can be this big.
|
||||
# Cassandra also has a bug (see CASSANDRA-19270) in the compound pk case -
|
||||
# instead of the expected InvalidRequest error, it generates an internal
|
||||
# server error (i.e., NoHostAvailable) with the message "'H' format requires
|
||||
# 0 <= number <= 65535".
|
||||
@pytest.mark.xfail(reason="Issue #12247")
|
||||
# In the *compound* partition-key case the Python driver rejects the
|
||||
# oversized component locally, before the request reaches the server: when
|
||||
# computing the token-aware routing key it serializes each compound key
|
||||
# component with a 2-byte length prefix:
|
||||
# cassandra/query.py:304:
|
||||
# yield struct.pack(">H%dsB" % l, l, p, 0)
|
||||
# For l > 65535 this raises struct.error("'H' format requires 0 <= number
|
||||
# <= 65535"), which the driver re-raises as NoHostAvailable - not as a
|
||||
# server-side InvalidRequest. The server-side fix in this PR for #12247
|
||||
# therefore cannot make this test pass; it stays xfail until the driver
|
||||
# either lifts the limit or surfaces the error as InvalidRequest. The
|
||||
# matching upstream issue is CASSANDRA-19270.
|
||||
@pytest.mark.xfail(reason="Driver rejects oversized compound-pk component (struct.pack '>H'); see CASSANDRA-19270, #12247")
|
||||
def test_insert_65k_pk_compound(cql, table2, cassandra_bug):
|
||||
stmt = cql.prepare(f'INSERT INTO {table2} (p1, p2, c1, c2) VALUES (?,?,?,?)')
|
||||
big = 'x'*(65*1024)
|
||||
@@ -147,7 +175,6 @@ def test_insert_65k_pk_compound(cql, table2, cassandra_bug):
|
||||
with pytest.raises(InvalidRequest, match='Key length'):
|
||||
cql.execute(stmt, ['dog', big, 'cat', 'mouse'])
|
||||
|
||||
@pytest.mark.xfail(reason="Issue #12247")
|
||||
def test_insert_65k_ck_composite(cql, table2):
|
||||
stmt = cql.prepare(f'INSERT INTO {table2} (p1, p2, c1, c2) VALUES (?,?,?,?)')
|
||||
big = 'x'*(65*1024)
|
||||
@@ -182,7 +209,6 @@ def test_insert_total_compound_pk_ok(cql, table2):
|
||||
stmt = cql.prepare(f'SELECT * FROM {table2} WHERE p1=? AND p2=?')
|
||||
assert list(cql.execute(stmt, [p1, p2])) == [(p1, p2, c1, c2)]
|
||||
|
||||
@pytest.mark.xfail(reason="Issue #12247")
|
||||
def test_insert_total_compound_pk_err(cql, table2):
|
||||
stmt = cql.prepare(f'INSERT INTO {table2} (p1, p2, c1, c2) VALUES (?,?,?,?)')
|
||||
# A total compound pk of size 65536 is definitely too long
|
||||
@@ -225,7 +251,6 @@ def test_insert_total_composite_ck_ok(cql, table2):
|
||||
stmt = cql.prepare(f'SELECT * FROM {table2} WHERE p1=? AND p2=?')
|
||||
assert list(cql.execute(stmt, [p1, p2])) == [(p1, p2, c1, c2)]
|
||||
|
||||
@pytest.mark.xfail(reason="Issue #12247")
|
||||
def test_insert_total_composite_ck_err(cql, table2):
|
||||
stmt = cql.prepare(f'INSERT INTO {table2} (p1, p2, c1, c2) VALUES (?,?,?,?)')
|
||||
# A total composite ck of size 65536 is definitely too long
|
||||
|
||||
Reference in New Issue
Block a user