sstables: fix a use-after-free in key_view::explode()

key_view::explode() contains a blatant use-after-free:
unless the input is already linearized, it returns a view to a local temporary buffer.

This is rare, because partition keys are usually not large enough to be fragmented.
But for a sufficiently large key, this bug causes a corrupted partition_key down
the line.

Fixes #17625

Closes scylladb/scylladb#17626
This commit is contained in:
Michał Chojnowski
2024-03-04 19:14:04 +01:00
committed by Botond Dénes
parent 7631605892
commit f9e97fa632
4 changed files with 25 additions and 10 deletions

View File

@@ -29,14 +29,10 @@ public:
return ::with_linearized(_bytes, func);
}
std::vector<bytes_view> explode(const schema& s) const {
return with_linearized([&] (bytes_view v) {
return composite_view(v, s.partition_key_size() > 1).explode();
});
}
partition_key to_partition_key(const schema& s) const {
return partition_key::from_exploded_view(explode(s));
return with_linearized([&] (bytes_view v) {
return partition_key::from_exploded_view(composite_view(v, s.partition_key_size() > 1).explode());
});
}
bool operator==(const key_view& k) const = default;

View File

@@ -412,7 +412,7 @@ public:
if (!_is_mutation_end) {
return proceed::yes;
}
auto pk = partition_key::from_exploded(key.explode(*_schema));
auto pk = key.to_partition_key(*_schema);
setup_for_partition(pk);
auto dk = dht::decorate_key(*_schema, pk);
_reader->on_next_partition(std::move(dk), tombstone(deltime));

View File

@@ -309,7 +309,7 @@ public:
if (!_is_mutation_end) {
return data_consumer::proceed::yes;
}
auto pk = partition_key::from_exploded(key.explode(*_schema));
auto pk = key.to_partition_key(*_schema);
setup_for_partition(pk);
auto dk = dht::decorate_key(*_schema, pk);
_reader->on_next_partition(std::move(dk), tombstone(deltime));
@@ -1941,7 +1941,7 @@ public:
}
data_consumer::proceed consume_partition_start(sstables::key_view key, sstables::deletion_time deltime) {
auto pk = partition_key::from_exploded(key.explode(*_schema));
auto pk = key.to_partition_key(*_schema);
auto dk = dht::decorate_key(*_schema, pk);
_current_pos = position_in_partition(position_in_partition::partition_start_tag_t{});
sstlog.trace("validating_consumer {}: {}({}) _expected_pkey={}", fmt::ptr(this), __FUNCTION__, pk, _expected_pkey);

View File

@@ -14,6 +14,7 @@
import pytest
from util import unique_name, new_test_table
import nodetool
import random
# Reproduces issue #8138, where the sstable reader in a TWCS sstable set
# had a bug and resulted in no results for queries.
@@ -36,3 +37,21 @@ def test_twcs_optimal_query_path(cql, test_keyspace, scylla_only):
# in the debug build.
nodetool.flush(cql, table)
assert 1 == len(list(cql.execute(f"SELECT * FROM {table} WHERE pk = 0 BYPASS CACHE")))
# Reproduces https://github.com/scylladb/scylladb/issues/17625.
# Without the fix, it trips ASAN.
def test_big_key_index_reader(cql, test_keyspace, scylla_only):
with new_test_table(cql, test_keyspace, "pk blob, ck blob, PRIMARY KEY (pk, ck)") as table:
# Insert a partition with a partition key large enough to be fragmented in LSA,
# and enough clustering rows for a promoted index to be written.
insert = cql.prepare(f"INSERT INTO {table}(pk, ck) VALUES(?, ?)")
k = 30000
pk = random.randbytes(k)
for i in range(10):
ck = random.randbytes(k)
cql.execute(insert, [pk, ck])
# Flush the partition.
nodetool.flush(cql, table)
# Read the partition from sstables.
select = cql.prepare(f"SELECT pk, ck FROM {table} WHERE pk = ? BYPASS CACHE")
cql.execute(select, [pk])