Files
scylladb/utils/managed_string.hh
Dawid Mędrek 9cc3d49233 utils/managed_string.hh: Introduce managed_string and fragmented_ostringstream
Currently, we use `managed_bytes` to represent fragmented sequences of bytes.
In some cases, the type corresponds to generic bytes, while in some other cases
-- to strings of actual text. Because of that, it's very easy to get confused
what use `managed_bytes` serve in a specific piece of code. We should avoid it.

In this commit, we're introducing basic wrappers over `managed_bytes` and
`bytes_ostream` with a promise that they represent UTF-8-encoded strings.
The interface of those types are pretty basic, but they should be sufficient
for the most common use: filling a stream with characters and then extracting
a fragmented buffer from it.
2025-06-30 19:12:08 +02:00

134 lines
3.7 KiB
C++

/*
* Copyright (C) 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include "utils/fragment_range.hh"
#include "utils/managed_bytes.hh"
#include "bytes_ostream.hh"
// A thin wrapper over `managed_bytes` representing a fragmented UTF-8 encoded string.
class managed_string {
private:
managed_bytes _impl;
private:
managed_string(managed_bytes mb) : _impl(std::move(mb)) {}
public:
managed_string(managed_string&&) noexcept = default;
managed_string(std::string_view sv)
: _impl(bytes_view(reinterpret_cast<const int8_t*>(sv.data()), sv.size()))
{}
managed_string& operator=(managed_string&&) noexcept = default;
// Precondition: the passed argument must represent a valid UTF-8 string.
static managed_string from_managed_bytes_unsafe(managed_bytes mb) {
return managed_string(std::move(mb));
}
bool operator==(const managed_string&) const = default;
std::strong_ordering operator<=>(const managed_string& other) const {
auto lv = managed_bytes_view(_impl);
auto rv = managed_bytes_view(other._impl);
return compare_unsigned(lv, rv);
}
template <typename Self>
decltype(auto) as_managed_bytes(this Self&& self) {
return std::forward_like<Self>(self._impl);
}
sstring linearize() const {
sstring result(sstring::initialized_later{}, _impl.size());
size_t offset = 0;
for (auto&& fragment : fragment_range(managed_bytes_view(_impl))) {
std::string_view char_view = to_string_view(fragment);
std::ranges::copy(char_view, result.begin() + offset);
offset += fragment.size();
}
return result;
}
};
template <> struct fmt::formatter<managed_string> : fmt::formatter<string_view> {
template <typename FormatContext>
auto format(const managed_string& b, FormatContext& ctx) const {
auto view = managed_bytes_view(b.as_managed_bytes());
auto out = ctx.out();
for (auto&& fragment : fragment_range(view)) {
std::string_view sv = to_string_view(fragment);
out = fmt::format_to(out, "{}", sv);
}
return out;
}
};
inline std::ostream& operator<<(std::ostream& os, const managed_string& b) {
fmt::print(os, "{}", b);
return os;
}
// A thin wrapper over `bytes_ostream` with a promise that it corresponds
// to actual UTF-8 characters, not just generic bytes.
class fragmented_ostringstream {
private:
bytes_ostream _impl;
public:
struct iter {
fragmented_ostringstream& stream;
iter& operator=(char c) {
stream.write(c);
return *this;
}
iter& operator*() { return *this; }
iter& operator++() { return *this; }
iter& operator++(int) { return *this; }
};
public:
[[gnu::always_inline]]
void write(std::string_view sv) {
_impl.write(sv.data(), sv.size());
}
[[gnu::always_inline]]
void write(char c) {
_impl.write(bytes_view(reinterpret_cast<typename bytes_ostream::value_type*>(&c), 1));
}
fragmented_ostringstream& operator<<(std::string sv) {
write(sv);
return *this;
}
fragmented_ostringstream& operator<<(char c) {
write(c);
return *this;
}
fragmented_ostringstream& operator<<(const managed_string& ms) {
for (auto&& fragment : fragment_range(managed_bytes_view(ms.as_managed_bytes()))) {
_impl.write(fragment);
}
return *this;
}
iter to_iter() noexcept {
return iter {*this};
}
managed_string to_managed_string() && {
return managed_string::from_managed_bytes_unsafe(std::move(_impl).to_managed_bytes());
}
};