mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-22 09:30:45 +00:00
3f7ee3ce5dintroduced system.batchlog_v2, with a schema designed to speed up batchlog replays and make post-replay cleanups much more effective. It did not introduce a cluster feature for the new table, because it is node local table, so the cluster can switch to the new table gradually, one node at a time. However, https://github.com/scylladb/scylladb/issues/27886 showed that the switching causes timeouts during upgrades, in mixed clusters. Furthermore, switching to the new table unconditionally on upgrades nodes, means that on rollback, the batches saved into the v2 table are lost. This PR introduces re-introduces v1 (`system.batchlog`) support and guards the use of the v2 table with a cluster feature, so mixed clusters keep using v1 and thus be rollback-compatible. The re-introduced v1 support doesn't support post-replay cleanups for simplicity. The cleanup in v1 was never particularly effective anyway and we ended up disabling it for heavy batchlog users, so I don't think the lack of support for cleanup is a problem. Fixes: https://github.com/scylladb/scylladb/issues/27886 Needs backport to 2026.1, to fix upgrades for clusters using batches Closes scylladb/scylladb#28736 * github.com:scylladb/scylladb: test/boost/batchlog_manager_test: add tests for v1 batchlog test/boost/batchlog_manager_test: make prepare_batches() work with both v1 and v2 test/boost/batchlog_manager_test: fix indentation test/boost/batchlog_manager_test: extract prepare_batches() method test/lib/cql_assertions: is_rows(): add dump parameter tools/scylla-sstable: extract query result printers tools/scylla-sstable: add std::ostream& arg to query result printers repair/row_level: repair_flush_hints_batchlog_handler(): add all_replayed to finish log db/batchlog_manager: re-add v1 support db/batchlog_manager: return all_replayed from process_batch() db/batchlog_manager: process_bath() fix indentation db/batchlog_manager: make batch() a standalone function db/batchlog_manager: make structs stats public db/batchlog_manager: allocate limiter on the stack db/batchlog_manager: add feature_service dependency gms/feature_service: add batchlog_v2 feature (cherry picked from commita83ee6cf66) Closes scylladb/scylladb#28853
282 lines
8.8 KiB
C++
282 lines
8.8 KiB
C++
/*
|
|
* Copyright (C) 2015-present ScyllaDB
|
|
*
|
|
* Modified by ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
|
*/
|
|
|
|
#include <cstdint>
|
|
#include "types/json_utils.hh"
|
|
#include "utils/assert.hh"
|
|
#include "utils/hashers.hh"
|
|
#include "utils/rjson.hh"
|
|
#include "cql3/result_set.hh"
|
|
|
|
namespace cql3 {
|
|
|
|
metadata::metadata(std::vector<lw_shared_ptr<column_specification>> names_)
|
|
: _flags(flag_enum_set())
|
|
, _column_info(make_lw_shared<column_info>(std::move(names_)))
|
|
{
|
|
if (!_column_info->_names.empty() && column_specification::all_in_same_table(_column_info->_names)) {
|
|
_flags.set<flag::GLOBAL_TABLES_SPEC>();
|
|
}
|
|
}
|
|
|
|
metadata::metadata(flag_enum_set flags, std::vector<lw_shared_ptr<column_specification>> names_, uint32_t column_count,
|
|
lw_shared_ptr<const service::pager::paging_state> paging_state)
|
|
: _flags(flags)
|
|
, _column_info(make_lw_shared<column_info>(std::move(names_), column_count))
|
|
, _paging_state(std::move(paging_state))
|
|
{
|
|
if (!_column_info->_names.empty() && column_specification::all_in_same_table(_column_info->_names)) {
|
|
_flags.set<flag::GLOBAL_TABLES_SPEC>();
|
|
}
|
|
}
|
|
|
|
// The maximum number of values that the ResultSet can hold. This can be bigger than columnCount due to CASSANDRA-4911
|
|
uint32_t metadata::value_count() const {
|
|
return _flags.contains<flag::NO_METADATA>() ? _column_info->_column_count : _column_info->_names.size();
|
|
}
|
|
|
|
void metadata::add_non_serialized_column(lw_shared_ptr<column_specification> name) {
|
|
// See comment above. Because columnCount doesn't account the newly added name, it
|
|
// won't be serialized.
|
|
_column_info->_names.emplace_back(std::move(name));
|
|
}
|
|
|
|
void metadata::hide_last_column() {
|
|
if (_column_info->_column_count == 0) {
|
|
utils::on_internal_error("Trying to hide a column when there are no columns visible.");
|
|
}
|
|
_column_info->_column_count--;
|
|
}
|
|
|
|
void metadata::set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state) {
|
|
_flags.set<flag::HAS_MORE_PAGES>();
|
|
_paging_state = std::move(paging_state);
|
|
}
|
|
|
|
void metadata::maybe_set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state) {
|
|
SCYLLA_ASSERT(paging_state);
|
|
if (paging_state->get_remaining() > 0) {
|
|
set_paging_state(std::move(paging_state));
|
|
} else {
|
|
_flags.remove<flag::HAS_MORE_PAGES>();
|
|
_paging_state = nullptr;
|
|
}
|
|
}
|
|
|
|
void metadata::set_skip_metadata() {
|
|
_flags.set<flag::NO_METADATA>();
|
|
}
|
|
|
|
metadata::flag_enum_set metadata::flags() const {
|
|
return _flags;
|
|
}
|
|
|
|
lw_shared_ptr<const service::pager::paging_state> metadata::paging_state() const {
|
|
return _paging_state;
|
|
}
|
|
|
|
// Metadata_id is a checksum computed from given metadata to track schema changes in prepared statements.
|
|
// Originally introduced in CQLv5.
|
|
cql3::cql_metadata_id_type metadata::calculate_metadata_id() const {
|
|
auto h = sha256_hasher();
|
|
for (uint32_t i = 0; i < _column_info->_column_count; ++i) {
|
|
feed_hash(h, _column_info->_names[i]->name->name());
|
|
feed_hash(h, _column_info->_names[i]->type->name());
|
|
}
|
|
// Return first 16 bytes to have the same length as Cassandra's MD5
|
|
return cql_metadata_id_type(h.finalize().substr(0, 16));
|
|
}
|
|
|
|
prepared_metadata::prepared_metadata(const std::vector<lw_shared_ptr<column_specification>>& names,
|
|
const std::vector<uint16_t>& partition_key_bind_indices,
|
|
bool is_conditional)
|
|
: _names{names}
|
|
, _partition_key_bind_indices{partition_key_bind_indices}
|
|
{
|
|
if (!names.empty() && column_specification::all_in_same_table(_names)) {
|
|
_flags.set<flag::GLOBAL_TABLES_SPEC>();
|
|
}
|
|
|
|
if (is_conditional) {
|
|
_flags.set<flag::LWT>();
|
|
}
|
|
}
|
|
|
|
prepared_metadata::flag_enum_set prepared_metadata::flags() const {
|
|
return _flags;
|
|
}
|
|
|
|
const std::vector<lw_shared_ptr<column_specification>>& prepared_metadata::names() const {
|
|
return _names;
|
|
}
|
|
|
|
const std::vector<uint16_t>& prepared_metadata::partition_key_bind_indices() const {
|
|
return _partition_key_bind_indices;
|
|
}
|
|
|
|
result_set::result_set(std::vector<lw_shared_ptr<column_specification>> metadata_)
|
|
: _metadata(::make_shared<metadata>(std::move(metadata_)))
|
|
{ }
|
|
|
|
result_set::result_set(::shared_ptr<metadata> metadata)
|
|
: _metadata(std::move(metadata))
|
|
{ }
|
|
|
|
size_t result_set::size() const {
|
|
return _rows.size();
|
|
}
|
|
|
|
bool result_set::empty() const {
|
|
return _rows.empty();
|
|
}
|
|
|
|
void result_set::add_row(std::vector<managed_bytes_opt> row) {
|
|
SCYLLA_ASSERT(row.size() == _metadata->value_count());
|
|
_rows.emplace_back(std::move(row));
|
|
}
|
|
|
|
void result_set::add_row(std::vector<bytes_opt> row) {
|
|
row_type new_row;
|
|
new_row.reserve(row.size());
|
|
for (auto& bo : row) {
|
|
new_row.emplace_back(bo ? managed_bytes_opt(*bo) : managed_bytes_opt());
|
|
}
|
|
add_row(std::move(new_row));
|
|
}
|
|
|
|
void result_set::add_column_value(managed_bytes_opt value) {
|
|
if (_rows.empty() || _rows.back().size() == _metadata->value_count()) {
|
|
std::vector<managed_bytes_opt> row;
|
|
row.reserve(_metadata->value_count());
|
|
_rows.emplace_back(std::move(row));
|
|
}
|
|
|
|
_rows.back().emplace_back(std::move(value));
|
|
}
|
|
|
|
void result_set::add_column_value(bytes_opt value) {
|
|
add_column_value(to_managed_bytes_opt(value));
|
|
}
|
|
|
|
void result_set::reverse() {
|
|
std::reverse(_rows.begin(), _rows.end());
|
|
}
|
|
|
|
void result_set::trim(size_t limit) {
|
|
if (_rows.size() > limit) {
|
|
_rows.resize(limit);
|
|
}
|
|
}
|
|
|
|
metadata& result_set::get_metadata() {
|
|
return *_metadata;
|
|
}
|
|
|
|
const metadata& result_set::get_metadata() const {
|
|
return *_metadata;
|
|
}
|
|
|
|
const utils::chunked_vector<std::vector<managed_bytes_opt>>& result_set::rows() const {
|
|
return _rows;
|
|
}
|
|
|
|
shared_ptr<const cql3::metadata>
|
|
make_empty_metadata() {
|
|
static thread_local shared_ptr<const metadata> empty_metadata_cache = [] {
|
|
auto result = ::make_shared<metadata>(std::vector<lw_shared_ptr<cql3::column_specification>>{});
|
|
result->set_skip_metadata();
|
|
return result;
|
|
}();
|
|
return empty_metadata_cache;
|
|
}
|
|
|
|
void print_query_results_text(std::ostream& os, const cql3::result& result) {
|
|
const auto& metadata = result.get_metadata();
|
|
const auto& column_metadata = metadata.get_names();
|
|
|
|
struct column_values {
|
|
size_t max_size{0};
|
|
sstring header_format;
|
|
sstring row_format;
|
|
std::vector<sstring> values;
|
|
|
|
void add(sstring value) {
|
|
max_size = std::max(max_size, value.size());
|
|
values.push_back(std::move(value));
|
|
}
|
|
};
|
|
|
|
std::vector<column_values> columns;
|
|
columns.resize(column_metadata.size());
|
|
|
|
for (size_t i = 0; i < column_metadata.size(); ++i) {
|
|
columns[i].add(column_metadata[i]->name->text());
|
|
}
|
|
|
|
for (const auto& row : result.result_set().rows()) {
|
|
for (size_t i = 0; i < row.size(); ++i) {
|
|
if (row[i]) {
|
|
columns[i].add(column_metadata[i]->type->to_string(linearized(managed_bytes_view(*row[i]))));
|
|
} else {
|
|
columns[i].add("");
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<sstring> separators(columns.size(), sstring());
|
|
for (size_t i = 0; i < columns.size(); ++i) {
|
|
auto& col_values = columns[i];
|
|
col_values.header_format = seastar::format(" {{:<{}}} ", col_values.max_size);
|
|
col_values.row_format = seastar::format(" {{:>{}}} ", col_values.max_size);
|
|
for (size_t c = 0; c < col_values.max_size; ++c) {
|
|
separators[i] += "-";
|
|
}
|
|
}
|
|
|
|
for (size_t r = 0; r < result.result_set().rows().size() + 1; ++r) {
|
|
std::vector<sstring> row;
|
|
row.reserve(columns.size());
|
|
for (size_t i = 0; i < columns.size(); ++i) {
|
|
const auto& format = r == 0 ? columns[i].header_format : columns[i].row_format;
|
|
row.push_back(fmt::format(fmt::runtime(std::string_view(format)), columns[i].values[r]));
|
|
}
|
|
fmt::print(os, "{}\n", fmt::join(row, "|"));
|
|
if (!r) {
|
|
fmt::print(os, "-{}-\n", fmt::join(separators, "-+-"));
|
|
}
|
|
}
|
|
}
|
|
|
|
void print_query_results_json(std::ostream& os, const cql3::result& result) {
|
|
const auto& metadata = result.get_metadata();
|
|
const auto& column_metadata = metadata.get_names();
|
|
|
|
rjson::streaming_writer writer(os);
|
|
|
|
writer.StartArray();
|
|
for (const auto& row : result.result_set().rows()) {
|
|
writer.StartObject();
|
|
for (size_t i = 0; i < row.size(); ++i) {
|
|
writer.Key(column_metadata[i]->name->text());
|
|
if (!row[i] || row[i]->empty()) {
|
|
writer.Null();
|
|
continue;
|
|
}
|
|
const auto value = to_json_string(*column_metadata[i]->type, *row[i]);
|
|
const auto type = to_json_type(*column_metadata[i]->type, *row[i]);
|
|
writer.RawValue(value, type);
|
|
}
|
|
writer.EndObject();
|
|
}
|
|
writer.EndArray();
|
|
}
|
|
|
|
}
|