Reduce logalloc differences between debug and release

A lot of code in scylla is only reachable if SEASTAR_DEFAULT_ALLOCATOR
is not defined. In particular, refill_emergency_reserve in the default
allocator case is empty, but in the seastar allocator case it compacts
segments.

I am trying to debug a crash that seems to involve memory corruption
around the lsa allocator, and being able to use a debug build for that
would be awesome.

This patch reduces the differences between the two cases by having a
common segment_pool that defers only a few operations to different
segment_store implementations.

Tests: unit (debug, dev)

Signed-off-by: Rafael Ávila de Espíndola <espindola@scylladb.com>
Message-Id: <20190606020937.118205-1-espindola@scylladb.com>
This commit is contained in:
Rafael Ávila de Espíndola
2019-06-05 19:09:37 -07:00
committed by Avi Kivity
parent 95bab04cf9
commit b3adabda2d

View File

@@ -427,16 +427,14 @@ struct alignas(segment_size) segment {
return reinterpret_cast<T*>(data + offset);
}
bool is_empty() const;
bool is_empty();
void record_alloc(size_type size);
void record_free(size_type size);
occupancy_stats occupancy() const;
occupancy_stats occupancy();
#ifndef SEASTAR_DEFAULT_ALLOCATOR
static void* operator new(size_t size) = delete;
static void* operator new(size_t, void* ptr) noexcept { return ptr; }
static void operator delete(void* ptr) = delete;
#endif
};
static constexpr size_t max_managed_object_size = segment_size * 0.1;
@@ -480,6 +478,91 @@ struct segment_descriptor : public log_heap_hook<segment_descriptor_hist_options
using segment_descriptor_hist = log_heap<segment_descriptor, segment_descriptor_hist_options>;
#ifndef SEASTAR_DEFAULT_ALLOCATOR
class segment_store {
memory::memory_layout _layout;
uintptr_t _segments_base; // The address of the first segment
public:
size_t non_lsa_reserve = 0;
segment_store()
: _layout(memory::get_memory_layout())
, _segments_base(align_down(_layout.start, (uintptr_t)segment::size)) {
}
segment* segment_from_idx(size_t idx) const {
return reinterpret_cast<segment*>(_segments_base) + idx;
}
size_t idx_from_segment(segment* seg) const {
return seg - reinterpret_cast<segment*>(_segments_base);
}
size_t new_idx_for_segment(segment* seg) {
return idx_from_segment(seg);
}
void free_segment(segment *seg) { }
size_t max_segments() const {
return (_layout.end - _segments_base) / segment::size;
}
bool can_allocate_more_segments() {
return memory::stats().free_memory() >= non_lsa_reserve + segment::size;
}
};
#else
class segment_store {
std::vector<segment*> _segments;
std::unordered_map<segment*, size_t> _segment_indexes;
static constexpr size_t _std_memory_available = size_t(1) << 30; // emulate 1GB per shard
std::vector<segment*>::iterator find_empty() {
// segment 0 is a marker for no segment
return std::find(_segments.begin() + 1, _segments.end(), nullptr);
}
public:
size_t non_lsa_reserve = 0;
segment_store() : _segments(max_segments()) {
_segment_indexes.reserve(max_segments());
}
segment* segment_from_idx(size_t idx) const {
assert(idx < _segments.size());
return _segments[idx];
}
size_t idx_from_segment(segment* seg) {
// segment 0 is a marker for no segment
auto i = _segment_indexes.find(seg);
if (i == _segment_indexes.end()) {
return 0;
}
return i->second;
}
size_t new_idx_for_segment(segment* seg) {
auto i = find_empty();
assert(i != _segments.end());
*i = seg;
size_t ret = i - _segments.begin();
_segment_indexes[seg] = ret;
return ret;
}
void free_segment(segment *seg) {
size_t i = idx_from_segment(seg);
assert(i != 0);
_segment_indexes.erase(seg);
_segments[i] = nullptr;
}
~segment_store() {
for (segment *seg : _segments) {
if (seg) {
seg->~segment();
free(seg);
}
}
}
size_t max_segments() const {
return _std_memory_available / segment::size;
}
bool can_allocate_more_segments() {
auto i = find_empty();
return i != _segments.end();
}
};
#endif
// Segment pool implementation for the seastar allocator.
// Stores segment descriptors in a vector which is indexed using most significant
@@ -488,8 +571,7 @@ using segment_descriptor_hist = log_heap<segment_descriptor, segment_descriptor_
// We prefer using high-address segments, and returning low-address segments to the seastar
// allocator in order to segregate lsa and non-lsa memory, to reduce fragmentation.
class segment_pool {
memory::memory_layout _layout;
uintptr_t _segments_base; // The address of the first segment
segment_store _store;
std::vector<segment_descriptor> _segments;
size_t _segments_in_use{};
utils::dynamic_bitset _lsa_owned_segments_bitmap; // owned by this
@@ -499,7 +581,6 @@ class segment_pool {
size_t _emergency_reserve_max = 30;
bool _allocation_failure_flag = false;
size_t _non_lsa_memory_in_use = 0;
size_t _non_lsa_reserve = 0;
// Invariants - a segment is in one of the following states:
// In use by some region
// - set in _lsa_owned_segments_bitmap
@@ -520,24 +601,24 @@ private:
segment* allocate_or_fallback_to_reserve();
void free_or_restore_to_reserve(segment* seg) noexcept;
segment* segment_from_idx(size_t idx) const {
return reinterpret_cast<segment*>(_segments_base) + idx;
return _store.segment_from_idx(idx);
}
size_t idx_from_segment(segment* seg) const {
return seg - reinterpret_cast<segment*>(_segments_base);
size_t idx_from_segment(segment* seg) {
return _store.idx_from_segment(seg);
}
size_t max_segments() const {
return (_layout.end - _segments_base) / segment::size;
return _store.max_segments();
}
bool can_allocate_more_memory(size_t size) {
return memory::stats().free_memory() >= _non_lsa_reserve + size;
bool can_allocate_more_segments() {
return _store.can_allocate_more_segments();
}
public:
segment_pool();
void prime(size_t available_memory, size_t min_free_memory);
segment* new_segment(region::impl* r);
segment_descriptor& descriptor(const segment*);
segment_descriptor& descriptor(segment*);
// Returns segment containing given object or nullptr.
segment* containing_segment(const void* obj) const;
segment* containing_segment(const void* obj);
segment* segment_from(const segment_descriptor& desc);
void free_segment(segment*) noexcept;
void free_segment(segment*, segment_descriptor&) noexcept;
@@ -559,7 +640,7 @@ public:
return _non_lsa_memory_in_use + _segments_in_use * segment::size;
}
struct reservation_goal;
void set_region(const segment* seg, region::impl* r) {
void set_region(segment* seg, region::impl* r) {
set_region(descriptor(seg), r);
}
void set_region(segment_descriptor& desc, region::impl* r) {
@@ -632,6 +713,7 @@ size_t segment_pool::reclaim_segments(size_t target) {
}
_lsa_free_segments_bitmap.clear(src_idx);
_lsa_owned_segments_bitmap.clear(src_idx);
_store.free_segment(src);
src->~segment();
::free(src);
++reclaimed_segments;
@@ -666,14 +748,14 @@ segment* segment_pool::allocate_segment(size_t reserve)
--_free_segments;
return seg;
}
if (can_allocate_more_memory(segment::size)) {
if (can_allocate_more_segments()) {
memory::disable_abort_on_alloc_failure_temporarily dfg;
auto p = aligned_alloc(segment::size, segment::size);
if (!p) {
continue;
}
auto seg = new (p) segment;
auto idx = idx_from_segment(seg);
auto idx = _store.new_idx_for_segment(seg);
_lsa_owned_segments_bitmap.set(idx);
return seg;
}
@@ -700,20 +782,20 @@ void segment_pool::refill_emergency_reserve() {
}
segment_descriptor&
segment_pool::descriptor(const segment* seg) {
uintptr_t seg_addr = reinterpret_cast<uintptr_t>(seg);
uintptr_t index = (seg_addr - _segments_base) >> segment::size_shift;
segment_pool::descriptor(segment* seg) {
uintptr_t index = idx_from_segment(seg);
return _segments[index];
}
segment*
segment_pool::containing_segment(const void* obj) const {
segment_pool::containing_segment(const void* obj) {
auto addr = reinterpret_cast<uintptr_t>(obj);
auto offset = addr & (segment::size - 1);
auto index = (addr - _segments_base) >> segment::size_shift;
auto seg = reinterpret_cast<segment*>(addr - offset);
auto index = idx_from_segment(seg);
auto& desc = _segments[index];
if (desc._region) {
return reinterpret_cast<segment*>(addr - offset);
return seg;
} else {
return nullptr;
}
@@ -723,7 +805,7 @@ segment*
segment_pool::segment_from(const segment_descriptor& desc) {
assert(desc._region);
auto index = &desc - &_segments[0];
return reinterpret_cast<segment*>(_segments_base + (index << segment::size_shift));
return segment_from_idx(index);
}
segment*
@@ -758,9 +840,7 @@ void segment_pool::free_segment(segment* seg, segment_descriptor& desc) noexcept
}
segment_pool::segment_pool()
: _layout(memory::get_memory_layout())
, _segments_base(align_down(_layout.start, (uintptr_t)segment::size))
, _segments(max_segments())
: _segments(max_segments())
, _lsa_owned_segments_bitmap(max_segments())
, _lsa_free_segments_bitmap(max_segments())
{
@@ -771,7 +851,7 @@ void segment_pool::prime(size_t available_memory, size_t min_free_memory) {
try {
// Allocate all of memory so that we occupy the top part. Afterwards, we'll start
// freeing from the bottom.
_non_lsa_reserve = 0;
_store.non_lsa_reserve = 0;
refill_emergency_reserve();
} catch (std::bad_alloc&) {
_emergency_reserve_max = old_emergency_reserve;
@@ -781,149 +861,11 @@ void segment_pool::prime(size_t available_memory, size_t min_free_memory) {
size_t min_gap = 1 * 1024 * 1024;
size_t max_gap = 64 * 1024 * 1024;
size_t gap = std::min(max_gap, std::max(available_memory / 16, min_gap));
_non_lsa_reserve = min_free_memory + gap;
_store.non_lsa_reserve = min_free_memory + gap;
// Since the reclaimer is not yet in place, free some low memory for general use
reclaim_segments(_non_lsa_reserve / segment::size);
reclaim_segments(_store.non_lsa_reserve / segment::size);
}
#else
// Segment pool version for the standard allocator. Slightly less efficient
// than the version for seastar's allocator.
class segment_pool {
class segment_deleter {
segment_pool* _pool;
public:
explicit segment_deleter(segment_pool* pool) : _pool(pool) {}
void operator()(segment* seg) const noexcept {
if (seg) {
delete seg;
_pool->_std_memory_available += segment::size;
}
}
};
std::unordered_map<const segment*, segment_descriptor> _segments;
std::unordered_map<const segment_descriptor*, segment*> _segment_descs;
std::stack<std::unique_ptr<segment, segment_deleter>> _free_segments;
size_t _segments_in_use{};
size_t _non_lsa_memory_in_use = 0;
size_t _std_memory_available = size_t(1) << 30; // emulate 1GB per shard
friend segment_deleter;
public:
void prime(size_t available_memory, size_t min_free_memory) {}
segment* new_segment(region::impl* r) {
if (_free_segments.empty()) {
if (_std_memory_available < segment::size) {
throw std::bad_alloc();
}
std::unique_ptr<segment, segment_deleter> seg{new segment, segment_deleter(this)};
_std_memory_available -= segment::size;
_free_segments.push(std::move(seg));
}
++_segments_in_use;
auto seg = _free_segments.top().release();
_free_segments.pop();
assert((reinterpret_cast<uintptr_t>(seg) & (sizeof(segment) - 1)) == 0);
segment_descriptor& desc = _segments[seg];
desc._free_space = segment::size;
desc._region = r;
_segment_descs[&desc] = seg;
return seg;
}
segment_descriptor& descriptor(const segment* seg) {
auto i = _segments.find(seg);
if (i != _segments.end()) {
return i->second;
} else {
segment_descriptor& desc = _segments[seg];
desc._region = nullptr;
return desc;
}
}
segment* segment_from(segment_descriptor& desc) {
auto i = _segment_descs.find(&desc);
assert(i != _segment_descs.end());
return i->second;
}
void free_segment(segment* seg, segment_descriptor& desc) {
free_segment(seg);
}
void free_segment(segment* seg) {
--_segments_in_use;
auto i = _segments.find(seg);
assert(i != _segments.end());
_segment_descs.erase(&i->second);
_segments.erase(i);
std::unique_ptr<segment, segment_deleter> useg{seg, segment_deleter(this)};
_free_segments.push(std::move(useg));
}
segment* containing_segment(const void* obj) const {
uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
auto seg = reinterpret_cast<segment*>(align_down(addr, static_cast<uintptr_t>(segment::size)));
auto i = _segments.find(seg);
if (i == _segments.end()) {
return nullptr;
}
return seg;
}
size_t segments_in_use() const;
size_t current_emergency_reserve_goal() const { return 0; }
void set_current_emergency_reserve_goal(size_t goal) { }
void set_emergency_reserve_max(size_t new_size) { }
size_t emergency_reserve_max() { return 0; }
void clear_allocation_failure_flag() { }
bool allocation_failure_flag() { return false; }
void refill_emergency_reserve() {}
void update_non_lsa_memory_in_use(ssize_t n) {
_non_lsa_memory_in_use += n;
}
size_t non_lsa_memory_in_use() const {
return _non_lsa_memory_in_use;
}
size_t total_memory_in_use() const {
return _non_lsa_memory_in_use + _segments_in_use * segment::size;
}
size_t unreserved_free_segments() const {
return 0;
}
void set_region(const segment* seg, region::impl* r) {
set_region(descriptor(seg), r);
}
void set_region(segment_descriptor& desc, region::impl* r) {
desc._region = r;
}
size_t reclaim_segments(size_t target) {
size_t reclaimed = 0;
while (reclaimed < target && !_free_segments.empty()) {
_free_segments.pop();
++reclaimed;
}
return reclaimed;
}
void reclaim_all_free_segments() {
reclaim_segments(std::numeric_limits<size_t>::max());
}
struct stats {
size_t segments_migrated;
size_t segments_compacted;
uint64_t memory_allocated;
uint64_t memory_compacted;
};
private:
stats _stats{};
public:
const stats& statistics() const { return _stats; }
void on_segment_migration() { _stats.segments_migrated++; }
void on_segment_compaction(size_t used_space);
void on_memory_allocation(size_t size);
size_t free_segments() const { return 0; }
public:
class reservation_goal;
};
#endif
void segment_pool::on_segment_compaction(size_t used_size) {
_stats.segments_compacted++;
_stats.memory_compacted += used_size;
@@ -961,12 +903,12 @@ void segment::record_free(segment::size_type size) {
shard_segment_pool.descriptor(this).record_free(size);
}
bool segment::is_empty() const {
bool segment::is_empty() {
return shard_segment_pool.descriptor(this).is_empty();
}
occupancy_stats
segment::occupancy() const {
segment::occupancy() {
return { shard_segment_pool.descriptor(this)._free_space, segment::size };
}
@@ -1779,8 +1721,12 @@ occupancy_stats tracker::impl::occupancy() {
}
size_t tracker::impl::non_lsa_used_space() {
#ifdef SEASTAR_DEFAULT_ALLOCATOR
return 0;
#else
auto free_space_in_lsa = shard_segment_pool.free_segments() * segment_size;
return memory::stats().allocated_memory() - region_occupancy().total_space() - free_space_in_lsa;
#endif
}
void tracker::impl::reclaim_all_free_segments()
@@ -2035,8 +1981,6 @@ size_t tracker::impl::compact_and_evict_locked(size_t reserve_segments, size_t m
return mem_released;
}
#ifndef SEASTAR_DEFAULT_ALLOCATOR
bool segment_pool::migrate_segment(segment* src, segment* dst)
{
auto& src_desc = descriptor(src);
@@ -2062,8 +2006,6 @@ bool segment_pool::migrate_segment(segment* src, segment* dst)
return true;
}
#endif
void tracker::impl::register_region(region::impl* r) {
reclaiming_lock _(*this);
_regions.push_back(r);
@@ -2259,8 +2201,6 @@ allocating_section::guard::~guard() {
shard_segment_pool.set_emergency_reserve_max(_prev);
}
#ifndef SEASTAR_DEFAULT_ALLOCATOR
void allocating_section::reserve() {
try {
shard_segment_pool.set_emergency_reserve_max(std::max(_lsa_reserve, _minimum_lsa_emergency_reserve));
@@ -2298,17 +2238,6 @@ void allocating_section::on_alloc_failure(logalloc::region& r) {
reserve();
}
#else
void allocating_section::reserve() {
}
void allocating_section::on_alloc_failure(logalloc::region&) {
throw std::bad_alloc();
}
#endif
void allocating_section::set_lsa_reserve(size_t reserve) {
_lsa_reserve = reserve;
}