`linearizing_input_stream` allows transparently reading linearized values from a fragmented buffer. This is done by linearizing on-the-fly only those read values that happen to be split across multiple fragments. This reduces the size of the largest allocation from the size of the entire buffer (when the entire buffer is linearized) to the size of the largest read value. This is a huge gain when the buffer contains loads of small objects, and modest gains when the buffer contains few large objects. But the even in the worst case the size of the largest allocation will be less or equal compared to the case where the entire buffer is linearized. This stream is planned to be used as glue code between the fragmented cell value and the collection deserialization code which expects to be reading linearized values.
135 lines
3.7 KiB
C++
135 lines
3.7 KiB
C++
/*
|
|
* Copyright (C) 2019 ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <list>
|
|
#include <fmt/format.h>
|
|
#include <seastar/net/byteorder.hh>
|
|
#include <seastar/util/backtrace.hh>
|
|
|
|
#include "utils/fragment_range.hh"
|
|
|
|
namespace utils {
|
|
|
|
// Facilitates transparently reading from a fragmented range.
|
|
template<typename T, typename Exception = std::runtime_error>
|
|
GCC6_CONCEPT(
|
|
requires FragmentRange<T>
|
|
)
|
|
class linearizing_input_stream {
|
|
using iterator = typename T::iterator;
|
|
using fragment_type = typename T::fragment_type;
|
|
|
|
private:
|
|
iterator _it;
|
|
iterator _end;
|
|
fragment_type _current;
|
|
size_t _size;
|
|
// We need stable addresses for the `bytes`, which, due to the small
|
|
// value optimization, can invalidate any attached bytes_view on move.
|
|
std::list<bytes> _linearized_values;
|
|
|
|
private:
|
|
size_t remove_current_prefix(size_t size) {
|
|
if (size < _current.size()) {
|
|
_current.remove_prefix(size);
|
|
_size -= size;
|
|
return size;
|
|
}
|
|
const auto ret = _current.size();
|
|
_size -= ret;
|
|
++_it;
|
|
_current = (_it == _end) ? fragment_type{} : *_it;
|
|
return ret;
|
|
}
|
|
|
|
void check_size(size_t size) const {
|
|
if (size > _size) {
|
|
seastar::throw_with_backtrace<Exception>(
|
|
fmt::format("linearizing_input_stream::check_size() - not enough bytes (requested {:d}, got {:d})", size, _size));
|
|
}
|
|
}
|
|
|
|
std::pair<bytes_view, bool> do_read(size_t size) {
|
|
check_size(size);
|
|
|
|
if (size <= _current.size()) {
|
|
bytes_view ret(_current.begin(), size);
|
|
remove_current_prefix(size);
|
|
return {ret, false};
|
|
}
|
|
|
|
auto out = _linearized_values.emplace_back(bytes::initialized_later{}, size).begin();
|
|
while (size) {
|
|
out = std::copy_n(_current.begin(), std::min(size, _current.size()), out);
|
|
size -= remove_current_prefix(size);
|
|
}
|
|
|
|
return {_linearized_values.back(), true};
|
|
}
|
|
|
|
public:
|
|
explicit linearizing_input_stream(const T& fr)
|
|
: _it(fr.begin())
|
|
, _end(fr.end())
|
|
, _current(*_it)
|
|
, _size(fr.size_bytes()) {
|
|
}
|
|
// Not cheap to copy, would copy all linearized values.
|
|
linearizing_input_stream(const linearizing_input_stream&) = delete;
|
|
|
|
size_t size() const {
|
|
return _size;
|
|
}
|
|
|
|
bool empty() const {
|
|
return _size == 0;
|
|
}
|
|
|
|
// The returned view is only valid as long as the stream is alive.
|
|
bytes_view read(size_t size) {
|
|
return do_read(size).first;
|
|
}
|
|
|
|
template <typename Type>
|
|
GCC6_CONCEPT(
|
|
requires std::is_trivial_v<Type>
|
|
)
|
|
Type read_trivial() {
|
|
auto [bv, linearized] = do_read(sizeof(Type));
|
|
auto ret = net::ntoh(*reinterpret_cast<const net::packed<Type>*>(bv.begin()));
|
|
if (linearized) {
|
|
_linearized_values.pop_back();
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void skip(size_t size) {
|
|
check_size(size);
|
|
while (size) {
|
|
size -= remove_current_prefix(size);
|
|
}
|
|
}
|
|
};
|
|
|
|
} // namespace utils
|