Files
scylladb/test/cqlpy/test_compaction.py
Nadav Har'El a56751e71b test/cqlpy: fix test assuming just one tablet
The cqlpy test test_compaction.py::test_compactionstats_after_major_compaction
was written to assume we have just one tablet per shard - if there are many
tablets compaction splitting the data, the test scenario might not need
compaction in the way that the test assumes it does.

Recently (commit 2463e524ed) Scylla's default
was changed to have 10 tablets per shard - not one. This broke this test.
The same commit modified test/cqlpy/suite.yaml, but that affects only test.py
and not test/cqlpy/run, and also not manual runs against a manually-installed
Scylla. If this test absolutely requires a keyspace with 1 and not 10
tablets, then it should create one explicitly. So this is what this test
does (but only if tablets are in use; if vnodes are used that's fine
too).

Before this patch,
  test/cqlpy/run test_compaction.py::test_compactionstats_after_major_compaction
fails. After the patch, it passes.

Fixes #23116

Closes scylladb/scylladb#23121
2025-03-04 10:15:29 +02:00

201 lines
11 KiB
Python

# Copyright 2024-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
import pytest
import requests
from .util import new_materialized_view, new_test_table, unique_name
from . import nodetool
import time
# sleep to let a ttl (of `seconds`) expire and
# the commitlog minimum gc time, in seconds,
# to be greater than the tombstone deletion time
def sleep_till_whole_second(seconds=1):
t = time.time()
time.sleep(seconds - (t - int(t)))
def test_tombstone_gc_with_conflict_in_memtable(scylla_only, cql, test_keyspace):
"""
Regression test for fixed https://github.com/scylladb/scylladb/issues/20423
"""
schema = "k int, v int, primary key (k, v)"
with new_test_table(cql, test_keyspace, schema, extra="with gc_grace_seconds = 0") as table:
with nodetool.no_autocompaction_context(cql, table):
# Insert initial data into the base table:
# Row 1 is expected to be garbage collected after expiration
# This test case tests the ttl case, while `test_tombstone_gc_with_delete_in_memtable`
# test also explicit deletion.
cql.execute(f"insert into {table} (k, v) values (1, 1) using timestamp 1 and ttl 1")
cql.execute(f"insert into {table} (k, v) values (1, 2) using timestamp 2")
nodetool.flush(cql, table)
# Delete row 2, this deletion is expected to be kept
# when we insert backdated live data into the memtable
cql.execute(f"delete from {table} using timestamp 3 where k=1 and v=2")
# Flush all tables explicitly now, since this is skipped in the test on purpose by next major compaction.
nodetool.flush_all(cql)
sleep_till_whole_second()
# Re-insert backdated data into the memtable. It should inhibit tombstone_gc
cql.execute(f"insert into {table} (k, v) values (1, 3) using timestamp 2")
# do not flush before major compaction
nodetool.compact(cql, table, flush_memtables=False)
res = cql.execute(f"select * from mutation_fragments({table})")
sstables = set()
keys = set()
rows = set()
for r in res:
if "sstable" in r.mutation_source:
if r.mutation_fragment_kind == "partition start":
sstables.add(r.mutation_source)
keys.add(r.k)
elif r.mutation_fragment_kind == "clustering row":
rows.add(r.v)
if r.v == 2:
assert "tombstone" in r.metadata
assert len(sstables) == 1, f"Expected single sstable but saw {len(sstables)}: res={list(res)}"
assert keys == {1}, f"Expected keys=={1} but got {keys}: res={list(res)}"
assert rows == {2}, f"Expected rows=={2} but got {rows}: res={list(res)}"
def test_tombstone_gc_with_delete_in_memtable(scylla_only, cql, test_keyspace):
"""
Reproduce https://github.com/scylladb/scylladb/issues/20423
"""
schema = "k int, v int, primary key (k, v)"
with new_test_table(cql, test_keyspace, schema, extra="with gc_grace_seconds = 0") as table:
with nodetool.no_autocompaction_context(cql, table):
# Insert initial data into the base table
# This test case tests the explicit deletion case, while `test_tombstone_gc_with_conflict_in_memtable
# tests the ttl expiration case
cql.execute(f"insert into {table} (k, v) values (1, 1) using timestamp 1 and ttl 1")
cql.execute(f"insert into {table} (k, v) values (1, 2) using timestamp 2")
cql.execute(f"insert into {table} (k, v) values (1, 3) using timestamp 3")
nodetool.flush(cql, table)
cql.execute(f"delete from {table} using timestamp 4 where k=1 and v=2")
# Flush all tables explicitly now, since this is skipped in the test on purpose by next major compaction.
nodetool.flush_all(cql)
sleep_till_whole_second()
# Insert backdated delete into the memtable. It should not inhibit tombstone_gc
cql.execute(f"delete from {table} using timestamp 2 where k=1 and v=3")
# The following insert should not inhibit tombstone_gc since its timestamp is greater than the tombstones
cql.execute(f"insert into {table} (k, v) values (1, 4) using timestamp 5")
# do not flush before major compaction
nodetool.compact(cql, table, flush_memtables=False)
res = cql.execute(f"select * from mutation_fragments({table})")
sstables = set()
keys = set()
rows = set()
for r in res:
if "sstable" in r.mutation_source:
print(r)
if r.mutation_fragment_kind == "partition start":
sstables.add(r.mutation_source)
keys.add(r.k)
elif r.mutation_fragment_kind == "clustering row":
rows.add(r.v)
assert not "tombstone" in r.metadata
assert len(sstables) == 1, f"Expected single sstable but saw {len(sstables)}: res={list(res)}"
assert keys == {1}, f"Expected keys=={1} but got {keys}: res={list(res)}"
assert rows == {3}, f"Expected rows=={3} but got {rows}: res={list(res)}"
def test_tombstone_gc_with_materialized_view_update_in_memtable(scylla_only, cql, test_keyspace):
"""
Reproduce https://github.com/scylladb/scylladb/issues/20424
"""
schema = "k int primary key, v int, w int"
with new_test_table(cql, test_keyspace, schema) as table:
# Create a materialized view with same partition key as the base, and using a regular column in the base as a clustering key in the view
with new_materialized_view(cql, table, '*', 'k, v', 'k is not null and v is not null', extra="with gc_grace_seconds = 0") as mv:
with nodetool.no_autocompaction_context(cql, mv):
# Insert initial data into the base table
cql.execute(f"insert into {table} (k, v, w) values (1, 1, 1)")
# Flush the memtable so the following update won't get compacted in the memtable
nodetool.flush_keyspace(cql, test_keyspace)
# Update the regular column in the base table, causing a view update
# with a shadowable row tombstone for the old value and recent row_marker for the new value
cql.execute(f"insert into {table} (k, v) values (1, 2)")
# Flush all tables explicitly now, since this is skipped in the test on purpose by next major compaction.
nodetool.flush_all(cql)
sleep_till_whole_second()
# Insert new view update into the memtable by updating the regular column in the base table.
# It will generate a view update with a shadowable row tombstone for the previous value
# and the value of a new row with the old value of `w` (with timestamp 1) - that inhibits the purging
# of the shadowable tombstone in the sstable without the fix for #20424
cql.execute(f"insert into {table} (k, v) values (1, 3)")
# do not flush before major compaction
nodetool.compact(cql, mv, flush_memtables=False)
res = cql.execute(f"select * from mutation_fragments({mv})")
sstables = set()
keys = set()
rows = set()
for r in res:
if "sstable" in r.mutation_source:
if r.mutation_fragment_kind == "partition start":
sstables.add(r.mutation_source)
keys.add(r.k)
elif r.mutation_fragment_kind == "clustering row":
rows.add(r.v)
assert "shadowable_tombstone" not in r.metadata
assert len(sstables) == 1, f"Expected single sstable but saw {len(sstables)}: res={list(res)}"
assert keys == {1}, f"Expected keys=={1} but got {keys}: res={list(res)}"
assert rows == {2}, f"Expected rows=={2} but got {keys}: res={list(res)}"
def get_compaction_stats(cql, table):
ks, cf = table.split('.')
res = requests.get(f'{nodetool.rest_api_url(cql)}/compaction_manager/metrics/pending_tasks_by_table')
res.raise_for_status()
stats = res.json()
tasks = 0
for s in stats:
if s['ks'] == ks and s['cf'] == cf:
tasks += int(s['task'])
return tasks, stats
# When using tablets, the test test_compactionstats_after_major_compaction
# assumes there is just one tablet - otherwise if there are many, each may
# have so little data that no compation will be required. So the following
# fixture is a keyspace that ensures it either uses vnodes or when using
# tablets - it uses just one.
@pytest.fixture(scope="module")
def test_keyspace_1(cql, this_dc, has_tablets, test_keyspace):
if has_tablets:
name = unique_name()
cql.execute("CREATE KEYSPACE " + name + " WITH REPLICATION = { 'class' : 'NetworkTopologyStrategy', '" + this_dc + "' : 1 } AND TABLETS = {'enabled': true, 'initial': 1 }")
yield name
else:
# The regular test_keyspace is fine, no need to create another one
yield test_keyspace
cql.execute("DROP KEYSPACE " + name)
@pytest.mark.parametrize("compaction_strategy", ["LeveledCompactionStrategy", "SizeTieredCompactionStrategy", "TimeWindowCompactionStrategy"])
def test_compactionstats_after_major_compaction(scylla_only, cql, test_keyspace_1, compaction_strategy):
"""
Test that compactionstats show no pending compaction after major compaction
"""
num_sstables = 16
extra_strategy_options = ""
if compaction_strategy == "LeveledCompactionStrategy":
extra_strategy_options = ", 'sstable_size_in_mb':1"
num_sstables *= 4 # Need enough data to trigger level 0 compaction
value = 'x' * 128*1024
with new_test_table(cql, test_keyspace_1,
schema="p int PRIMARY KEY, v text",
extra=f"WITH compression={{}} AND compaction={{'class':'{compaction_strategy}'{extra_strategy_options}}}") as table:
with nodetool.no_autocompaction_context(cql, table):
for i in range(num_sstables):
cql.execute(f"INSERT INTO {table} (p, v) VALUES ({i}, '{value}')")
nodetool.flush(cql, table)
tasks, stats = get_compaction_stats(cql, table)
assert tasks > 0, f"Found no pending compaction tasks as expected: stats={stats}"
nodetool.compact(cql, table)
tasks, stats = get_compaction_stats(cql, table)
assert tasks == 0, f"Found {tasks} pending compaction tasks unexpectedly: stats={stats}"