Merge "Properly write static rows missing columns for SSTables 3.x." from Vladimir

Before this fix, write_missing_columns() helper would always deal with
regular columns even when writing static rows.

This would cause errors on reading those files.

Now, the missing columns are written correctly for regular and static
rows alike.

* github.com/argenet/scylla.git projects/sstables-30/fix-writing-static-missing-columns/v1:
  schema: Add helper method returning the count of columns of specified
    kind.
  sstables: Honour the column kind when writing missing columns in 'mc'
    format.
  tests: Add test for a static row with missing columns (SStables 3.x.).
This commit is contained in:
Tomasz Grabiec
2018-10-26 09:05:56 +02:00
11 changed files with 57 additions and 8 deletions

View File

@@ -1121,6 +1121,21 @@ schema::has_static_columns() const {
return !static_columns().empty();
}
column_count_type
schema::columns_count(column_kind kind) const {
switch (kind) {
case column_kind::partition_key:
return partition_key_size();
case column_kind::clustering_key:
return clustering_key_size();
case column_kind::static_column:
return static_columns_count();
case column_kind::regular_column:
return regular_columns_count();
default:
std::abort();
}
}
column_count_type
schema::partition_key_size() const {
return column_offset(column_kind::clustering_key);

View File

@@ -701,6 +701,7 @@ public:
bool is_last_partition_key(const column_definition& def) const;
bool has_multi_cell_collections() const;
bool has_static_columns() const;
column_count_type columns_count(column_kind kind) const;
column_count_type partition_key_size() const;
column_count_type clustering_key_size() const;
column_count_type static_columns_count() const;

View File

@@ -204,6 +204,7 @@ class missing_columns_input_range
: public input_range_base<missing_columns_input_range, uint64_t> {
private:
const schema& _schema;
const column_kind _kind;
const row& _row;
mutable uint64_t _current_value = 0;
mutable column_id _current_id = 0;
@@ -216,12 +217,15 @@ private:
} _mode;
public:
missing_columns_input_range(const schema& s, const row& row)
missing_columns_input_range(const schema& s, column_kind kind, const row& row)
: _schema(s)
, _kind(kind)
, _row(row) {
assert(kind == column_kind::static_column || kind == column_kind::regular_column);
auto row_size = _row.size();
auto total_size = _schema.regular_columns_count();
auto total_size = _schema.columns_count(_kind);
_current_id = row_size < total_size ? 0 : total_size;
_mode = (total_size < 64) ? encoding_mode::small :
@@ -230,7 +234,7 @@ public:
}
bool next() const {
auto total_size = _schema.regular_columns_count();
auto total_size = _schema.columns_count(_kind);
if (_current_id == total_size) {
// No more values to encode
return false;
@@ -285,12 +289,12 @@ public:
explicit operator bool() const
{
return (_current_id < _schema.regular_columns_count());
return (_current_id < _schema.columns_count(_kind));
}
};
void write_missing_columns(file_writer& out, const schema& s, const row& row) {
for (const auto value: missing_columns_input_range{s, row}) {
void write_missing_columns(file_writer& out, const schema& s, column_kind kind, const row& row) {
for (const auto value: missing_columns_input_range{s, kind, row}) {
write_vint(out, value);
}
}

View File

@@ -75,7 +75,7 @@ void write_clustering_prefix(file_writer& out, const schema& s,
const clustering_key_prefix& prefix, ephemerally_full_prefix is_ephemerally_full);
// Writes encoded information about missing columns in the given row
void write_missing_columns(file_writer& out, const schema& s, const row& row);
void write_missing_columns(file_writer& out, const schema& s, column_kind kind, const row& row);
// Helper functions for writing delta-encoded time-related values
void write_delta_timestamp(file_writer& out, api::timestamp_type timestamp, const encoding_stats& enc_stats);

View File

@@ -3209,7 +3209,7 @@ void sstable_writer_m::write_cells(file_writer& writer, column_kind kind, const
// This differs from Origin where all updated columns are tracked and the set of filled columns of a row
// is compared with the set of all columns filled in the memtable. So our encoding may be less optimal in some cases
// but still valid.
write_missing_columns(writer, _schema, row_body);
write_missing_columns(writer, _schema, kind, row_body);
row_body.for_each_cell([this, &writer, kind, &properties, has_complex_deletion] (column_id id, const atomic_cell_or_collection& c) {
auto&& column_definition = _schema.column_at(kind, id);
if (!column_definition.is_atomic()) {

View File

@@ -4639,3 +4639,31 @@ SEASTAR_THREAD_TEST_CASE(test_regular_and_shadowable_deletion) {
validate_read(s, tmp.path, {mut1, mut2});
}
SEASTAR_THREAD_TEST_CASE(test_write_static_row_with_missing_columns) {
auto abj = defer([] { await_background_jobs().get(); });
sstring table_name = "static_row_with_missing_columns";
// CREATE TABLE static_row (pk int, ck int, st1 int static, st2 int static, rc int, PRIMARY KEY (pk, ck)) WITH compression = {'sstable_compression': ''};
schema_builder builder("sst3", table_name);
builder.with_column("pk", int32_type, column_kind::partition_key);
builder.with_column("ck", int32_type, column_kind::clustering_key);
builder.with_column("st1", int32_type, column_kind::static_column);
builder.with_column("st2", int32_type, column_kind::static_column);
builder.with_column("rc", int32_type);
builder.set_compressor_params(compression_parameters());
schema_ptr s = builder.build(schema_builder::compact_storage::no);
lw_shared_ptr<memtable> mt = make_lw_shared<memtable>(s);
// INSERT INTO static_row (pk, ck, st1, rc) VALUES (0, 1, 2, 3);
auto key = partition_key::from_deeply_exploded(*s, {0});
mutation mut{s, key};
clustering_key ckey = clustering_key::from_deeply_exploded(*s, { 1 });
mut.partition().apply_insert(*s, ckey, write_timestamp);
mut.set_static_cell("st1", data_value{2}, write_timestamp);
mut.set_cell(ckey, "rc", data_value{3}, write_timestamp);
mt->apply(mut);
tmpdir tmp = write_and_compare_sstables(s, mt, table_name);
validate_read(s, tmp.path, {mut});
}