Files
scylladb/test/cqlpy/test_batch.py
copilot-swe-agent[bot] 91dd15d3bf Fix batch size check to only apply to atomic batches with tests
The verify_batch_size() check was applied to all batches before determining mutate_atomic, causing unnecessary failures for batches that never touch system.batchlog. Only logged multi-partition batches (mutate_atomic=true) are stored as blobs in system.batchlog and require size validation.

Changed:

* Moved verify_batch_size() call in execute_without_conditions() directly
  into the logged multi-partition branch for clearer intent
* The check is now placed exactly where mutate_atomic=true, making the
  code more intuitive

Result:
Batch Type  Single-Partition  Size Check
LOGGED      false             YES
LOGGED      true              NO
UNLOGGED    N/A               NO

Tests:

* test_unlogged_batch_size_not_checked: Unlogged batch >1024KB succeeds
* test_logged_multi_partition_batch_size_checked: Logged multi-partition batch >1024KB fails
* test_logged_single_partition_batch_size_not_checked: Logged single-partition batch >1024KB succeeds

Fixes #27605

Co-authored-by: mykaul <yaniv.kaul@scylladb.com>
2025-12-15 10:45:29 +02:00

132 lines
6.4 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2022-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#############################################################################
# Tests for batch operations
#############################################################################
from cassandra import InvalidRequest
from cassandra.cluster import NoHostAvailable
from .util import new_test_table
from .rest_api import scylla_inject_error
import pytest
@pytest.fixture(scope="module")
def table1(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "k int primary key, t text") as table:
yield table
def generate_big_batch(table, size_in_kb):
statements = [f"INSERT INTO {table} (k, t) VALUES ({idx}, '{'x' * 743}')" for idx in range(size_in_kb)]
return "BEGIN BATCH\n" + "\n".join(statements) + "\n APPLY BATCH\n"
@pytest.mark.xfail(reason="Scylla does not send warnings for batch: https://github.com/scylladb/scylla/issues/10196")
def test_warnings_are_returned_in_response(cql, table1):
"""Verifies if response contains warning messages.
Because warning message threshold is different for Scylla and Cassandra,
test tries several sizes until warning appears.
"""
for size_in_kb in [10, 129, 256]:
over_sized_batch = generate_big_batch(table1, size_in_kb)
response_future = cql.execute_async(over_sized_batch)
response_future.result()
if response_future.warnings:
break
else:
pytest.fail('Oversized batch did not generate a warning')
# example message for Cassandra:
# 'Batch for [cql_test_1647006065554.cql_test_1647006065623] is of size 7590,
# exceeding specified threshold of 5120 by 2470.'
assert "exceeding specified" in response_future.warnings[0]
def test_error_is_raised_for_batch_size_above_threshold(cql, table1):
"""Verifies cql returns error for batch that exceeds size provided in batch_size_fail_threshold_in_kb setting
from scylla.yaml."""
with pytest.raises(InvalidRequest, match="Batch too large"):
cql.execute(generate_big_batch(table1, 1025))
# Test checks unexpected errors handling in CQL server.
#
# The original problem was that std::bad_alloc exception occurred while parsing a large batch request.
# This exception was caught by try/catch in cql_server::connection::process_request_one and
# an attempt was made to construct the error response message via make_error function.
# This attempt failed since the error message contained entire query and exceeded the limit of 64K
# in cql_server::response::write_string, causing "Value too large" exception to be thrown.
# This new exception reached the general handler in cql_server::connection::process_request, where
# it was just logged and no information about the problem was sent to the client.
# As a result, the client received a timeout exception after a while and
# no other information about the cause of the error.
#
# It is quite difficult to reproduce OOM in a test, so we use error injection instead.
# Passing injection_key in the body of the request ensures that the exception will be
# thrown only for this test request and will not affect other requests that
# the driver may send in the background.
def test_batch_with_error(cql, table1):
injection_key = 'query_processor-parse_statement-test_failure'
with scylla_inject_error(cql, injection_key, one_shot=False):
# exceptions::exception_code::SERVER_ERROR, it gets converted to NoHostAvailable by the driver
with pytest.raises(NoHostAvailable, match="Value too large"):
cql.execute(generate_big_batch(table1, 100) + injection_key)
def test_unlogged_batch_size_not_checked(cql, test_keyspace):
"""Verifies that UNLOGGED batches are NOT subject to batch size limits.
Unlogged batches are applied as independent mutations and don't go through
the system.batchlog table, so their collective size is irrelevant.
This test should succeed even with a batch larger than the fail threshold.
"""
with new_test_table(cql, test_keyspace, "k int primary key, t text") as table:
# Create a batch larger than the fail threshold (1024 KB)
# This would fail for a logged batch, but should succeed for unlogged
statements = [f"INSERT INTO {table} (k, t) VALUES ({idx}, '{'x' * 743}')" for idx in range(1100)]
unlogged_batch = "BEGIN UNLOGGED BATCH\n" + "\n".join(statements) + "\n APPLY BATCH\n"
# This should not raise an exception
cql.execute(unlogged_batch)
# Verify the data was inserted
result = cql.execute(f"SELECT COUNT(*) FROM {table}")
assert result.one()[0] == 1100
def test_logged_multi_partition_batch_size_checked(cql, test_keyspace):
"""Verifies that LOGGED batches targeting multiple partitions ARE subject to batch size limits.
Logged multi-partition batches go through system.batchlog and must be size-checked.
"""
with new_test_table(cql, test_keyspace, "k int primary key, t text") as table:
# Create a batch larger than the fail threshold (1024 KB) with multiple partitions
statements = [f"INSERT INTO {table} (k, t) VALUES ({idx}, '{'x' * 743}')" for idx in range(1100)]
logged_batch = "BEGIN BATCH\n" + "\n".join(statements) + "\n APPLY BATCH\n"
# This should raise "Batch too large" exception
with pytest.raises(InvalidRequest, match="Batch too large"):
cql.execute(logged_batch)
def test_logged_single_partition_batch_size_not_checked(cql, test_keyspace):
"""Verifies that LOGGED batches targeting a single partition are NOT subject to batch size limits.
Logged single-partition batches don't go through system.batchlog,
so their collective size is not relevant.
"""
with new_test_table(cql, test_keyspace, "k int, c int, t text, primary key (k, c)") as table:
# Create a batch larger than the fail threshold (1024 KB) but all targeting the same partition
statements = [f"INSERT INTO {table} (k, c, t) VALUES (1, {idx}, '{'x' * 743}')" for idx in range(1100)]
logged_batch = "BEGIN BATCH\n" + "\n".join(statements) + "\n APPLY BATCH\n"
# This should not raise an exception since it's a single-partition batch
cql.execute(logged_batch)
# Verify the data was inserted
result = cql.execute(f"SELECT COUNT(*) FROM {table} WHERE k = 1")
assert result.one()[0] == 1100