Files
scylladb/test/cqlpy/test_sstable_compression.py
Nikos Dragazis 1e37781d86 schema: Add initializer for compression defaults
In PR 5b6570be52 we introduced the config option
`sstable_compression_user_table_options` to allow adjusting the default
compression settings for user tables. However, the new option was hooked
into the CQL layer and applied only to CQL base tables, not to the whole
spectrum of user tables: CQL auxiliary tables (materialized views,
secondary indexes, CDC log tables), Alternator base tables, Alternator
auxiliary tables (GSIs, LSIs, Streams).

Fix this by moving the logic into the `schema_builder` via a schema
initializer. This ensures that the default compression settings are
applied uniformly regardless of how the table is created, while also
keeping the logic in a central place.

Register the initializer at startup in all executables where schemas are
being used (`scylla_main()`, `scylla_sstable_main()`, `cql_test_env`).

Finally, remove the ad-hoc logic from `create_table_statement`
(redundant as of this patch), remove the xfail markers from the relevant
tests and adjust `test_describe_cdc_log_table_create_statement` to
expect LZ4WithDicts as the default compressor.

Fixes #26914.

Signed-off-by: Nikos Dragazis <nikolaos.dragazis@scylladb.com>
2026-01-13 20:45:59 +02:00

137 lines
7.9 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2023-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#############################################################################
# Tests for configuration of compressed sstables
#############################################################################
import pytest
from . import nodetool
from .util import new_test_table, new_materialized_view, is_scylla
from cassandra.protocol import ConfigurationException, SyntaxException
# In older Cassandra and Scylla, the name of the compression algorithm was
# given as a "sstable_compression" attribute, but newer Cassandra switched
# to "class". Check that we support this new name class.
# Reproduces #8948.
@pytest.mark.xfail(reason="#8948")
def test_compression_class(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int primary key, v int", "with compression = { 'class': 'LZ4Compressor' }") as table:
pass
# In the following tests, we use the older "sstable_compression" option name
# (instead of the new "class") so we can have passing tests despite #8948.
# When both Scylla and Cassandra support "class", we should modify this variable
# to use it:
sstable_compression = 'sstable_compression'
@pytest.fixture(scope="module")
def table_lz4(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int primary key, v int", "with compression = { '" + sstable_compression + "': 'LZ4Compressor' }") as table:
yield table
# Test that if we have a table with lz4 compression, it has the expected
# compression "class" in its schema table. Note that even if the older
# "sstable_compression" attribute was used to set the compression class,
# when reading the schema we should see "class".
# Reproduces #8948.
@pytest.mark.xfail(reason="#8948")
def test_read_compression_class(cql, table_lz4):
[ks, cf] = table_lz4.split('.')
opts = cql.execute(f"SELECT compression FROM system_schema.tables WHERE keyspace_name='{ks}' AND table_name='{cf}'").one().compression
assert 'class' in opts
assert opts['class'] == 'org.apache.cassandra.io.compress.LZ4Compressor'
# When creating a compressed table without specifying chunk_length_in_kb
# explicitly, some default value is nevertheless used, and its value should
# be readable from the schema.
# Reproduces #6442.
@pytest.mark.xfail(reason="#6442")
def test_read_chunk_length(cql, table_lz4):
[ks, cf] = table_lz4.split('.')
opts = cql.execute(f"SELECT compression FROM system_schema.tables WHERE keyspace_name='{ks}' AND table_name='{cf}'").one().compression
assert 'chunk_length_in_kb' in opts
# Both Cassandra and Scylla only allow chunk_length_in_kb to be set a power
# of two.
def test_chunk_length_must_be_power_of_two(cql, test_keyspace):
with pytest.raises(ConfigurationException, match='power of 2'):
with new_test_table(cql, test_keyspace, "p int primary key, v int", "with compression = { '" + sstable_compression + "': 'LZ4Compressor', 'chunk_length_in_kb': 100 }") as table:
pass
# chunk_length_in_kb cannot be zero, negative, null, or non-integer.
# Surprisingly, Scylla allows floating-point numbers (and truncates them).
# It shouldn't, and Cassandra doesn't, so this case is "xfail" below.
@pytest.mark.parametrize("garbage", ["0", "-1", "null", "'dog'",
pytest.param("1.1", marks=pytest.mark.xfail(reason='Scylla truncates float chunk length'))])
def test_chunk_length_invalid(cql, test_keyspace, garbage):
# The error should usually be ConfigurationException, but strangely
# Cassandra throws a SyntaxException in the "null" case.
with pytest.raises((ConfigurationException, SyntaxException), match='chunk_length_in_kb'):
with new_test_table(cql, test_keyspace, "p int primary key, v int", "with compression = { '" + sstable_compression + "': 'LZ4Compressor', 'chunk_length_in_kb': " + garbage + " }") as table:
pass
# If a user is allowed to specify a huge number for chunk_length_in_kb, it can
# result in unbounded allocations and potentially crashing Scylla. Therefore,
# there ought to be some limit for the configured chunk length. Let's check it
# by trying a ridiculously large value, which shouldn't be legal.
# This test fails on Cassandra, which doesn't have protection against huge
# chunk sizes, so the test is marked a "cassandra_bug".
# Reproduces #9933.
def test_huge_chunk_length(cql, test_keyspace, cassandra_bug):
with pytest.raises(ConfigurationException, match='chunk_length_in_kb'):
with new_test_table(cql, test_keyspace, "p int primary key, v int", "with compression = { '" + sstable_compression + "': 'LZ4Compressor', 'chunk_length_in_kb': 1048576 }") as table:
# At this point, the test already failed, as we expected the table
# creation to have failed with ConfigurationException. But if we
# reached here, let's really demonstrate the bug - write
# something and flush it, to have sstable compression actually
# be used.
cql.execute(f'INSERT INTO {table} (p, v) VALUES (1, 2)')
nodetool.flush(cql, table)
# Also check the same for ALTER TABLE
with new_test_table(cql, test_keyspace, "p int primary key, v int") as table:
with pytest.raises(ConfigurationException, match='chunk_length_in_kb'):
cql.execute("ALTER TABLE " + table + " with compression = { '" + sstable_compression + "': 'LZ4Compressor', 'chunk_length_in_kb': 1048576 }")
# Check that whatever is currently the default sstable compression algorithm
# (for a long time it was LZ4Compressor but issue #26610 changed it to
# LZ4WithDictsCompressor), the base table and all its auxiliary tables -
# materialized view, secondary index and CDC log table - should use the same
# compression. We don't care if this happens because all these tables use
# the same default - or alternatively because the auxiliary tables "inherit"
# the configuration of the base table (#20388). Both are fine for this test.
#
# If the test runs on Cassandra, only the materialized view part of the test
# is done - the CDC and secondary index implementation on Cassandra is
# different and does not have an ordinary table backing it.
# Reproduces issue #26914
def test_aux_tables_compression_parity(cql, test_keyspace):
extra = "with cdc = {'enabled': true}" if is_scylla(cql) else ""
with new_test_table(cql, test_keyspace, "p int primary key, v int", extra) as table:
with new_materialized_view(cql, table, '*', 'v, p', 'v is not null and p is not null') as mv:
cql.execute(f'CREATE INDEX ON {table}(v)')
# base table's compression setting:
ks, cf = table.split('.')
r = list(cql.execute(f"SELECT compression FROM system_schema.tables WHERE keyspace_name='{ks}' and table_name='{cf}'"))
assert len(r) == 1
base_compression = r[0].compression
# view table's compression setting:
_, view = mv.split('.')
r = list(cql.execute(f"SELECT compression FROM system_schema.views WHERE keyspace_name='{ks}' and view_name='{view}'"))
assert len(r) == 1
view_compression = r[0].compression
if extra: # Scylla-only part
# secondary index's backing view's compression setting:
r = list(cql.execute(f"SELECT compression FROM system_schema.views WHERE keyspace_name='{ks}' and view_name='{cf}_v_idx_index'"))
assert len(r) == 1
index_compression = r[0].compression
assert view_compression == index_compression
# CDC log table's compression setting:
r = list(cql.execute(f"SELECT compression FROM system_schema.tables WHERE keyspace_name='{ks}' and table_name='{cf}_scylla_cdc_log'"))
assert len(r) == 1
cdc_compression = r[0].compression
assert view_compression == cdc_compression
assert base_compression == view_compression