From c46ae2c2ab50ca830afeb8beee3d9783eb30fde0 Mon Sep 17 00:00:00 2001 From: Andrzej Jackowski Date: Thu, 12 Feb 2026 14:47:52 +0100 Subject: [PATCH] test: explicitly set compression algorithm in test_autoretrain_dict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `test_autoretrain_dict` was originally written, the default `sstable_compression_user_table_options` was `LZ4Compressor`. The test assumed (correctly) that initially the compression doesn't use a trained dictionary, and later in the test scenario, it changed the algorithm to one with a dictionary. However, the default `sstable_compression_user_table_options` is now `LZ4WithDictsCompressor`, so the old assumption is no longer correct. As a result, the assertion that data is initially not compressed well may or may not fail depending on dictionary training timing. To fix this, this commit explicitly sets `ZstdCompressor` as the initial `sstable_compression_user_table_options`, ensuring that the assumption that initial compression is without a dictionary is always met. Note: `ZstdCompressor` differs from the former default `LZ4Compressor`. However, it's a better choice — the test aims to show the benefit of using a dictionary, not the benefit of Zstd over LZ4 (and the test uses ZstdWithDictsCompressor as the algorithm with the dictionary). Fixes: scylladb/scylladb#28204 (cherry picked from commit 9ffa62a986815709d0a09c705d2d0caf64776249) --- .../test_sstable_compression_dictionaries_autotrain.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/cluster/test_sstable_compression_dictionaries_autotrain.py b/test/cluster/test_sstable_compression_dictionaries_autotrain.py index 1ff0fecd06..86f4ac248b 100644 --- a/test/cluster/test_sstable_compression_dictionaries_autotrain.py +++ b/test/cluster/test_sstable_compression_dictionaries_autotrain.py @@ -53,6 +53,9 @@ async def test_autoretrain_dict(manager: ManagerClient): n_blobs = 1024 uncompressed_size = blob_size * n_blobs * rf + # Start with compressor without a dictionary + cfg = { "sstable_compression_user_table_options": "ZstdCompressor" } + logger.info("Bootstrapping cluster") servers = await manager.servers_add(2, cmdline=[ '--logger-log-level=storage_service=debug', @@ -61,7 +64,7 @@ async def test_autoretrain_dict(manager: ManagerClient): '--sstable-compression-dictionaries-retrain-period-in-seconds=1', '--sstable-compression-dictionaries-autotrainer-tick-period-in-seconds=1', f'--sstable-compression-dictionaries-min-training-dataset-bytes={int(uncompressed_size/2)}', - ], auto_rack_dc="dc1") + ], auto_rack_dc="dc1", config=cfg) logger.info("Creating table") cql = manager.get_cql()