Files
scylladb/query-result-writer.hh
Tomasz Grabiec 9724b84bb3 db: Fix query of partitions with no live clustered rows
When partition has no live regular rows, but has some data live in the
static row, then it should appear in the results, even though we
didn't select any static column.

To reproduce:

  create table cf (k blob, c blob, v blob, s1 blob static, primary key (k, c));
  update cf set s1 = 0x01 where k = 0x01;
  update cf set s1 = 0x02 where k = 0x02;
  select k from cf;

The "select" statement should return 2 rows, but was returning 0.

The following query worked fine, because static columns were included:

  select * from cf;

The data query should contain only live data, so we shouldn't write a
partition entry if it's supposed to be absent from the results. We
can'r tell that though until we've processed all the data. To solve
this problem, query result writer is using an optimistic approach,
where the partition header will be retracted from the buffer
(cheaply), if it turns out there's no live data in it.
2015-07-09 19:55:00 +02:00

166 lines
4.3 KiB
C++

/*
* Copyright 2015 Cloudius Systems
*/
#pragma once
#include "types.hh"
#include "atomic_cell.hh"
#include "query-request.hh"
#include "query-result.hh"
// Refer to query-result.hh for the query result format
namespace query {
class result::row_writer {
bytes_ostream& _w;
const partition_slice& _slice;
bytes_ostream::place_holder<uint32_t> _size_ph;
size_t _start_pos;
bool _finished = false;
public:
row_writer(
const partition_slice& slice,
bytes_ostream& w,
bytes_ostream::place_holder<uint32_t> size_ph)
: _w(w)
, _slice(slice)
, _size_ph(size_ph)
, _start_pos(w.size())
{ }
~row_writer() {
assert(_finished);
}
void add_empty() {
// FIXME: store this in a bitmap
_w.write<int8_t>(false);
}
void add(::atomic_cell_view c) {
// FIXME: store this in a bitmap
_w.write<int8_t>(true);
assert(c.is_live());
if (_slice.options.contains<partition_slice::option::send_timestamp_and_expiry>()) {
_w.write(c.timestamp());
if (c.is_live_and_has_ttl()) {
_w.write<gc_clock::rep>(c.expiry().time_since_epoch().count());
} else {
_w.write<gc_clock::rep>(std::numeric_limits<gc_clock::rep>::max());
}
}
_w.write_blob(c.value());
}
void add(collection_mutation::view v) {
// FIXME: store this in a bitmap
_w.write<int8_t>(true);
_w.write_blob(v.data);
}
void finish() {
auto row_size = _w.size() - _start_pos;
assert((uint32_t)row_size == row_size);
_w.set(_size_ph, (uint32_t)row_size);
_finished = true;
}
};
// Call finish() or retract() when done.
class result::partition_writer {
bytes_ostream& _w;
const partition_slice& _slice;
bytes_ostream::place_holder<uint32_t> _count_ph;
bytes_ostream::position _pos;
uint32_t _row_count = 0;
bool _static_row_added = false;
bool _finished = false;
public:
partition_writer(
const partition_slice& slice,
bytes_ostream::place_holder<uint32_t> count_ph,
bytes_ostream::position pos,
bytes_ostream& w)
: _w(w)
, _slice(slice)
, _count_ph(count_ph)
, _pos(pos)
{ }
~partition_writer() {
assert(_finished);
}
row_writer add_row(const clustering_key& key) {
if (_slice.options.contains<partition_slice::option::send_clustering_key>()) {
_w.write_blob(key);
}
++_row_count;
auto size_placeholder = _w.write_place_holder<uint32_t>();
return row_writer(_slice, _w, size_placeholder);
}
// Call before any add_row()
row_writer add_static_row() {
assert(!_static_row_added); // Static row can be added only once
assert(!_row_count); // Static row must be added before clustered rows
_static_row_added = true;
auto size_placeholder = _w.write_place_holder<uint32_t>();
return row_writer(_slice, _w, size_placeholder);
}
uint32_t row_count() const {
return _row_count;
}
void finish() {
_w.set(_count_ph, _row_count);
// The partition is live. If there are no clustered rows, there
// must be something live in the static row, which counts as one row.
_row_count = std::max<uint32_t>(_row_count, 1);
_finished = true;
}
void retract() {
_row_count = 0;
_w.retract(_pos);
_finished = true;
}
const partition_slice& slice() const {
return _slice;
}
};
class result::builder {
bytes_ostream _w;
const partition_slice& _slice;
public:
builder(const partition_slice& slice) : _slice(slice) { }
// Starts new partition and returns a builder for its contents.
// Invalidates all previously obtained builders
partition_writer add_partition(const partition_key& key) {
auto pos = _w.pos();
auto count_place_holder = _w.write_place_holder<uint32_t>();
if (_slice.options.contains<partition_slice::option::send_partition_key>()) {
_w.write_blob(key);
}
return partition_writer(_slice, count_place_holder, pos, _w);
}
result build() {
return result(std::move(_w));
};
const partition_slice& slice() const {
return _slice;
}
};
}