Drop the AGPL license in favor of a source-available license. See the blog post [1] for details. [1] https://www.scylladb.com/2024/12/18/why-were-moving-to-a-source-available-license/
315 lines
13 KiB
Python
315 lines
13 KiB
Python
# Copyright 2023-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
|
|
#############################################################################
|
|
# Tests for sstable validation.
|
|
#
|
|
# Namely, detecting discrepancies between index and data files.
|
|
# These tests produce pairs of sstables, that differ only slightly, mixes the
|
|
# index/data from the two of them, then runs `sstable validate` expecting it to
|
|
# find the discrepancies.
|
|
#############################################################################
|
|
|
|
|
|
import glob
|
|
import json
|
|
import os
|
|
import pytest
|
|
import shutil
|
|
import subprocess
|
|
import tempfile
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def schema1_file():
|
|
"""Create a schema.cql for the schema1"""
|
|
with tempfile.NamedTemporaryFile("w+t") as f:
|
|
f.write("CREATE TABLE ks.tbl (pk int, ck int, v text, PRIMARY KEY (pk, ck))")
|
|
f.flush()
|
|
yield f.name
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def schema2_file():
|
|
"""Create a schema.cql for the schema2"""
|
|
with tempfile.NamedTemporaryFile("w+t") as f:
|
|
f.write("CREATE TABLE ks.tbl (pk int, ck int, v text, s text STATIC, PRIMARY KEY (pk, ck))")
|
|
f.flush()
|
|
yield f.name
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def large_rows():
|
|
"""Create a set of large rows"""
|
|
rows = []
|
|
v_1k = 'v' * 1024
|
|
for ck in range(128):
|
|
ck_raw = "0004{:08x}".format(ck)
|
|
rows.append({ "type": "clustering-row", "key": { "raw": ck_raw }, "columns": { "v": { "is_live": True, "type": "regular", "timestamp": 1680263186359882, "value": v_1k } } })
|
|
return rows
|
|
|
|
|
|
def find_component(generation, component_type, sst_dir):
|
|
comps = glob.glob(os.path.join(sst_dir, f"*-{str(generation)}-big-{component_type}.db"))
|
|
assert len(comps) == 1
|
|
return comps[0]
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def sstable_cache(scylla_path):
|
|
"""Content-addressable sstable cache"""
|
|
class cache:
|
|
def __init__(self, scylla_path, workdir):
|
|
self._scylla_path = scylla_path
|
|
self._in_json = os.path.join(workdir, "input.json")
|
|
self._store_dir = os.path.join(workdir, "sst_store_dir")
|
|
self._cache = {}
|
|
self._next_generation = 0
|
|
|
|
os.mkdir(self._store_dir)
|
|
|
|
@property
|
|
def dir(self):
|
|
return self._store_dir
|
|
|
|
def get_generation(self, sst, schema_file):
|
|
json_str = json.dumps(sst)
|
|
generation = self._cache.get(json_str)
|
|
if not generation is None:
|
|
return generation
|
|
with open(self._in_json, "w") as f:
|
|
f.write(json_str)
|
|
generation = self._next_generation
|
|
self._cache[json_str] = generation
|
|
self._next_generation = self._next_generation + 1
|
|
subprocess.check_call([self._scylla_path, "sstable", "write", "--schema-file", schema_file, "--output-dir", self._store_dir, "--generation", str(generation), "--input-file", self._in_json])
|
|
return generation
|
|
|
|
def copy_sstable_to(self, generation, target_dir):
|
|
for f in glob.glob(os.path.join(self._store_dir, f"*-{str(generation)}-big-*.*")):
|
|
shutil.copy(f, target_dir)
|
|
|
|
with tempfile.TemporaryDirectory() as workdir:
|
|
yield cache(scylla_path, workdir)
|
|
|
|
|
|
def validate_mixed_sstable_pair(ssta, sstb, scylla_path, sst_cache, sst_work_dir, schema_file, error_message):
|
|
"""Validate an sstable, created by mixing the index and data from two different sstables.
|
|
|
|
Check that the validation has the expected result (`error_message`).
|
|
"""
|
|
generation_a = sst_cache.get_generation(ssta, schema_file)
|
|
generation_b = sst_cache.get_generation(sstb, schema_file)
|
|
shutil.rmtree(sst_work_dir)
|
|
os.mkdir(sst_work_dir)
|
|
sst_cache.copy_sstable_to(generation_a, sst_work_dir)
|
|
shutil.copyfile(find_component(generation_b, "Index", sst_cache.dir), find_component(generation_a, "Index", sst_work_dir))
|
|
|
|
res = subprocess.run([scylla_path, "sstable", "validate", "--schema-file", schema_file, find_component(generation_a, "Data", sst_work_dir)],
|
|
check=True,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE)
|
|
out = res.stdout.decode('utf-8')
|
|
err = res.stderr.decode('utf-8')
|
|
sstables = json.loads(out)['sstables']
|
|
valid = next(iter(sstables.values()))['valid']
|
|
if error_message:
|
|
assert not valid
|
|
assert error_message in err
|
|
else:
|
|
print(err)
|
|
assert valid
|
|
|
|
|
|
def make_partition(pk, ck, v):
|
|
pks = [ "000400000001", "000400000000", "000400000002", "000400000003" ]
|
|
r = { "type": "clustering-row", "key": { "raw": "0004{:08x}".format(ck) }, "columns": { "v": { "is_live": True, "type": "regular", "timestamp": 1680263186359882, "value": v } } }
|
|
return { "key": { "raw": pks[pk] }, "clustering_elements": [ r ] }
|
|
|
|
|
|
def make_large_partition(pk, rows, with_static_row = False):
|
|
pks = [ "000400000001", "000400000000", "000400000002", "000400000003" ]
|
|
if with_static_row:
|
|
return {
|
|
"key": { "raw": pks[pk] },
|
|
"static_row": { "s": { "is_live": True, "type": "regular", "timestamp": 1680263186359882, "value": "s" * 128 } },
|
|
"clustering_elements": rows
|
|
}
|
|
else:
|
|
return { "key": { "raw": pks[pk] }, "clustering_elements": rows }
|
|
|
|
|
|
def test_scylla_sstable_validate_ok1(cql, scylla_path, temp_workdir, schema1_file, sstable_cache):
|
|
validate_mixed_sstable_pair(
|
|
[make_partition(0, 0, 'v')],
|
|
[make_partition(0, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "")
|
|
|
|
|
|
def test_scylla_sstable_validate_ok2(cql, scylla_path, temp_workdir, schema1_file, sstable_cache):
|
|
validate_mixed_sstable_pair(
|
|
[make_partition(0, 0, 'v'), make_partition(1, 0, 'v'), make_partition(2, 0, 'v')],
|
|
[make_partition(0, 0, 'v'), make_partition(1, 0, 'v'), make_partition(2, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "")
|
|
|
|
|
|
def test_scylla_sstable_validate_mismatching_partition(cql, scylla_path, temp_workdir, schema1_file, sstable_cache):
|
|
validate_mixed_sstable_pair(
|
|
[make_partition(0, 0, 'v')],
|
|
[make_partition(1, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: partition mismatch")
|
|
|
|
|
|
def test_scylla_sstable_validate_mismatching_position(cql, scylla_path, temp_workdir, schema1_file, sstable_cache):
|
|
validate_mixed_sstable_pair(
|
|
[make_partition(0, 0, 'v'), make_partition(2, 0, 'v')],
|
|
[make_partition(0, 0, 'vv'), make_partition(2, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: position mismatch")
|
|
|
|
|
|
def test_scylla_sstable_validate_index_ends_before_data(cql, scylla_path, temp_workdir, schema1_file, sstable_cache):
|
|
validate_mixed_sstable_pair(
|
|
[make_partition(0, 0, 'v'), make_partition(2, 0, 'v')],
|
|
[make_partition(0, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: index is at EOF, but data file has more data")
|
|
|
|
|
|
@pytest.mark.xfail(reason="index's EOF definition depends on data file size")
|
|
def test_scylla_sstable_validate_index_ends_before_data1(cql, scylla_path, temp_workdir, schema1_file, sstable_cache):
|
|
validate_mixed_sstable_pair(
|
|
[make_partition(0, 0, 'v')],
|
|
[make_partition(0, 0, 'vvvvvvvvvvvvvvvvvvvvvv'), make_partition(2, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: data is at EOF, but index has more data")
|
|
|
|
|
|
@pytest.mark.xfail(reason="index's EOF definition depends on data file size")
|
|
def test_scylla_sstable_validate_index_ends_before_data2(cql, scylla_path, temp_workdir, schema1_file, sstable_cache):
|
|
validate_mixed_sstable_pair(
|
|
[make_partition(0, 0, 'v')],
|
|
[make_partition(0, 0, 'v'), make_partition(2, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: data is at EOF, but index has more data")
|
|
|
|
def test_scylla_sstable_validate_large_rows_ok1(cql, scylla_path, temp_workdir, schema1_file, sstable_cache, large_rows):
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, large_rows)],
|
|
[make_large_partition(0, large_rows)],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "")
|
|
|
|
def test_scylla_sstable_validate_large_rows_ok2(cql, scylla_path, temp_workdir, schema2_file, sstable_cache, large_rows):
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, large_rows)],
|
|
[make_large_partition(0, large_rows)],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema2_file,
|
|
error_message = "")
|
|
|
|
def test_scylla_sstable_validate_large_rows_ok3(cql, scylla_path, temp_workdir, schema2_file, sstable_cache, large_rows):
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, large_rows, True)],
|
|
[make_large_partition(0, large_rows, True)],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema2_file,
|
|
error_message = "")
|
|
|
|
|
|
def test_scylla_sstable_validate_large_rows_mismatching_position1(cql, scylla_path, temp_workdir, schema1_file, sstable_cache, large_rows):
|
|
row_0x = { "type": "clustering-row", "key": { "raw": "000400000000" }, "columns": { "v": { "is_live": True, "type": "regular", "timestamp": 1680263186359882, "value": "v" } } }
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, large_rows)],
|
|
[make_large_partition(0, [row_0x] + large_rows[1:])],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: position mismatch: promoted index:")
|
|
|
|
|
|
def test_scylla_sstable_validate_large_rows_mismatching_position2(cql, scylla_path, temp_workdir, schema2_file, sstable_cache, large_rows):
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, large_rows, True)],
|
|
[make_large_partition(0, large_rows)],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema2_file,
|
|
error_message = "mismatching index/data: position mismatch: promoted index:")
|
|
|
|
|
|
def test_scylla_sstable_validate_large_rows_mismatching_position3(cql, scylla_path, temp_workdir, schema2_file, sstable_cache, large_rows):
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, large_rows)],
|
|
[make_large_partition(0, large_rows, True)],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema2_file,
|
|
error_message = "mismatching index/data: position mismatch: promoted index:")
|
|
|
|
def test_scylla_sstable_validate_large_rows_mismathing_row(cql, scylla_path, temp_workdir, schema1_file, sstable_cache, large_rows):
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, [large_rows[1]] + large_rows[4:])],
|
|
[make_large_partition(0, [large_rows[2]] + large_rows[4:])],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: clustering element")
|
|
|
|
def test_scylla_sstable_validate_large_rows_end_of_pi_not_end_of_rows(cql, scylla_path, temp_workdir, schema1_file, sstable_cache, large_rows):
|
|
row_0x = { "type": "clustering-row", "key": { "raw": "000400000000" }, "columns": { "v": { "is_live": True, "type": "regular", "timestamp": 1680263186359882, "value": "v" } } }
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, [row_0x]), make_partition(2, 0, 'v')],
|
|
[make_large_partition(0, large_rows), make_partition(2, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: promoted index has more blocks, but it is end of partition")
|
|
|
|
def test_scylla_sstable_validate_large_rows_end_of_rows_not_end_of_pi(cql, scylla_path, temp_workdir, schema1_file, sstable_cache, large_rows):
|
|
validate_mixed_sstable_pair(
|
|
[make_large_partition(0, large_rows), make_partition(2, 0, 'v')],
|
|
[make_large_partition(0, large_rows[:96]), make_partition(2, 0, 'v')],
|
|
scylla_path,
|
|
sstable_cache,
|
|
temp_workdir,
|
|
schema_file = schema1_file,
|
|
error_message = "mismatching index/data: promoted index has no more blocks, but partition")
|