Compare commits

...

3 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
91aab869b8 Fix exception name and remove unnecessary template keyword
- Use correct exception type: bufsize_mismatch_exception instead of bufsize_mismatch_error
- Remove unnecessary template keyword for read<pos_type>()
- Match parameter order with existing pattern (actual, expected)

Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
2025-11-06 12:01:01 +00:00
copilot-swe-agent[bot]
168e0a40e3 Fix oversized allocation in sstables::parse by using fragmented buffer
This addresses issue where reading summary positions could cause
large contiguous memory allocations (249856 bytes reported).
Added read_exactly_fragmented() method to random_access_reader to
support reading into fragmented buffers, avoiding oversized allocations.

Co-authored-by: mykaul <4655593+mykaul@users.noreply.github.com>
2025-11-06 11:57:10 +00:00
copilot-swe-agent[bot]
ac54f21504 Initial plan 2025-11-06 11:50:13 +00:00
3 changed files with 29 additions and 5 deletions

View File

@@ -11,6 +11,7 @@
#include "sstables/random_access_reader.hh"
#include "utils/disk-error-handler.hh"
#include "utils/log.hh"
#include "utils/fragmented_temporary_buffer.hh"
namespace sstables {
@@ -24,6 +25,15 @@ future <temporary_buffer<char>> random_access_reader::read_exactly(size_t n) noe
}
}
future<fragmented_temporary_buffer> random_access_reader::read_exactly_fragmented(size_t n) noexcept {
try {
fragmented_temporary_buffer::reader reader;
return reader.read_exactly(*_in, n);
} catch (...) {
return current_exception_as_future<fragmented_temporary_buffer>();
}
}
static future<> close_if_needed(std::unique_ptr<input_stream<char>> in) {
if (!in) {
return make_ready_future<>();

View File

@@ -17,6 +17,7 @@
#include <seastar/core/iostream.hh>
#include <seastar/core/temporary_buffer.hh>
#include "seastarx.hh"
#include "utils/fragmented_temporary_buffer.hh"
namespace sstables {
@@ -33,6 +34,8 @@ protected:
public:
future <temporary_buffer<char>> read_exactly(size_t n) noexcept;
future<fragmented_temporary_buffer> read_exactly_fragmented(size_t n) noexcept;
future<> seek(uint64_t pos) noexcept;
bool eof() const noexcept { return _in->eof(); }

View File

@@ -36,6 +36,7 @@
#include "utils/error_injection.hh"
#include "utils/to_string.hh"
#include "utils/fragmented_temporary_buffer.hh"
#include "data_dictionary/storage_options.hh"
#include "dht/sharder.hh"
#include "writer.hh"
@@ -518,16 +519,26 @@ future<> parse(const schema& schema, sstable_version_types v, random_access_read
s.header.memory_size,
s.header.sampling_level,
s.header.size_at_full_sampling);
auto buf = co_await in.read_exactly(s.header.size * sizeof(pos_type));
auto len = s.header.size * sizeof(pos_type);
check_buf_size(buf, len);
// Use fragmented buffer to avoid large contiguous allocations
auto frag_buf = co_await in.read_exactly_fragmented(len);
if (frag_buf.empty()) {
throw bufsize_mismatch_exception(0, len);
}
if (frag_buf.size_bytes() != len) {
throw bufsize_mismatch_exception(frag_buf.size_bytes(), len);
}
// Positions are encoded in little-endian.
auto b = buf.get();
auto stream = frag_buf.get_istream();
s.positions.reserve(s.header.size + 1);
while (s.positions.size() != s.header.size) {
s.positions.push_back(seastar::read_le<pos_type>(b));
b += sizeof(pos_type);
auto pos_result = stream.read<pos_type>();
if (!pos_result) {
std::rethrow_exception(pos_result.assume_error());
}
s.positions.push_back(seastar::le_to_cpu(*pos_result));
co_await coroutine::maybe_yield();
}
// Since the keys in the index are not sized, we need to calculate