commitlog: replay: Respect back-pressure from memtable space to prevent OOM

Commit log replay was bypassing memtable space back-pressure, and if
replay was faster than memtable flush, it could lead to OOM.

The fix is to call database::apply_in_memory() instead of
table::apply(). The former blocks when memtable space is full.

Fixes #4982.

Tests:
  - unit (release)
  - manual, replay with memtable flush failin and without failing

Message-Id: <1568381952-26256-1-git-send-email-tgrabiec@scylladb.com>
This commit is contained in:
Tomasz Grabiec
2019-09-13 15:39:12 +02:00
committed by Avi Kivity
parent 3c49b2960b
commit 79935df959
2 changed files with 14 additions and 8 deletions

View File

@@ -1301,6 +1301,7 @@ private:
bool _supports_infinite_bound_range_deletions = false;
future<> init_commitlog();
public:
future<> apply_in_memory(const frozen_mutation& m, schema_ptr m_schema, db::rp_handle&&, db::timeout_clock::time_point timeout);
future<> apply_in_memory(const mutation& m, column_family& cf, db::rp_handle&&, db::timeout_clock::time_point timeout);
private:

View File

@@ -301,16 +301,21 @@ future<> db::commitlog_replayer::impl::process(stats* s, fragmented_temporary_bu
mutation m(cf.schema(), fm.decorated_key(*cf.schema()));
converting_mutation_partition_applier v(cm, *cf.schema(), m.partition());
fm.partition().accept(cm, v);
cf.apply(std::move(m));
return do_with(std::move(m), [&db, &cf] (mutation m) {
return db.apply_in_memory(m, cf, db::rp_handle(), db::no_timeout);
});
} else {
cf.apply(fm, cf.schema());
return db.apply_in_memory(fm, cf.schema(), db::rp_handle(), db::no_timeout);
}
}).then_wrapped([s] (future<> f) {
try {
f.get();
s->applied_mutations++;
} catch (...) {
s->invalid_mutations++;
// TODO: write mutation to file like origin.
rlogger.warn("error replaying: {}", std::current_exception());
}
s->applied_mutations++;
return make_ready_future<>();
}).handle_exception([s](auto ep) {
s->invalid_mutations++;
// TODO: write mutation to file like origin.
rlogger.warn("error replaying: {}", ep);
});
} catch (no_such_column_family&) {
// No such CF now? Origin just ignores this.