Be more conservative when deciding when to shut down due to disk errors
Currently we only shut down on EIO. Expand this to shut down on any system_error. This may cause us to shut down prematurely due to a transient error, but this is better than not shutting down due to a permanent error (such as ENOSPC or EPERM). We may whitelist certain errors in the future to improve the behavior. Fixes #1311. Message-Id: <1465136956-1352-1-git-send-email-avi@scylladb.com>
This commit is contained in:
committed by
Tomasz Grabiec
parent
17b56eb459
commit
961e80ab74
Notes:
Pekka Enberg
2016-06-06 16:15:25 +03:00
backport: 1.2
@@ -36,6 +36,8 @@ extern thread_local disk_error_signal_type sstable_read_error;
|
||||
extern thread_local disk_error_signal_type sstable_write_error;
|
||||
extern thread_local disk_error_signal_type general_disk_error;
|
||||
|
||||
bool should_stop_on_system_error(const std::system_error& e);
|
||||
|
||||
template<typename Func, typename... Args>
|
||||
std::enable_if_t<!is_future<std::result_of_t<Func(Args&&...)>>::value,
|
||||
std::result_of_t<Func(Args&&...)>>
|
||||
@@ -44,7 +46,7 @@ do_io_check(disk_error_signal_type& signal, Func&& func, Args&&... args) {
|
||||
// calling function
|
||||
return func(std::forward<Args>(args)...);
|
||||
} catch (std::system_error& e) {
|
||||
if (is_system_error_errno(EIO)) {
|
||||
if (should_stop_on_system_error(e)) {
|
||||
signal();
|
||||
throw storage_io_error(e);
|
||||
}
|
||||
@@ -62,7 +64,7 @@ auto do_io_check(disk_error_signal_type& signal, Func&& func, Args&&... args) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (std::system_error& sys_err) {
|
||||
if (is_system_error_errno(EIO)) {
|
||||
if (should_stop_on_system_error(sys_err)) {
|
||||
signal();
|
||||
throw storage_io_error(sys_err);
|
||||
}
|
||||
@@ -70,7 +72,7 @@ auto do_io_check(disk_error_signal_type& signal, Func&& func, Args&&... args) {
|
||||
return futurize<std::result_of_t<Func(Args&&...)>>::make_exception_future(ep);
|
||||
});
|
||||
} catch (std::system_error& e) {
|
||||
if (is_system_error_errno(EIO)) {
|
||||
if (should_stop_on_system_error(e)) {
|
||||
signal();
|
||||
throw storage_io_error(e);
|
||||
}
|
||||
|
||||
@@ -49,3 +49,9 @@ bool is_system_error_errno(int err_no)
|
||||
code.category() == std::system_category();
|
||||
});
|
||||
}
|
||||
|
||||
bool should_stop_on_system_error(const std::system_error& e) {
|
||||
// We may whitelist transient errors in the future, but for now,
|
||||
// be conservative.
|
||||
return true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user