2076 lines
65 KiB
C++
2076 lines
65 KiB
C++
/*
|
|
* This file is open source software, licensed to you under the terms
|
|
* of the Apache License, Version 2.0 (the "License"). See the NOTICE file
|
|
* distributed with this work for additional information regarding copyright
|
|
* ownership. You may not use this file except in compliance with the License.
|
|
*
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing,
|
|
* software distributed under the License is distributed on an
|
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
* KIND, either express or implied. See the License for the
|
|
* specific language governing permissions and limitations
|
|
* under the License.
|
|
*/
|
|
/*
|
|
* Copyright 2014 Cloudius Systems
|
|
*/
|
|
|
|
#include <sys/syscall.h>
|
|
#include "task.hh"
|
|
#include "reactor.hh"
|
|
#include "memory.hh"
|
|
#include "core/posix.hh"
|
|
#include "net/packet.hh"
|
|
#include "resource.hh"
|
|
#include "print.hh"
|
|
#include "scollectd.hh"
|
|
#include "util/conversions.hh"
|
|
#include "core/future-util.hh"
|
|
#include "thread.hh"
|
|
#include <cassert>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <sys/eventfd.h>
|
|
#include <boost/thread/barrier.hpp>
|
|
#include <boost/algorithm/string/classification.hpp>
|
|
#include <boost/algorithm/string/split.hpp>
|
|
#include <boost/iterator/counting_iterator.hpp>
|
|
#include <atomic>
|
|
#include <dirent.h>
|
|
#ifdef HAVE_DPDK
|
|
#include <core/dpdk_rte.hh>
|
|
#include <rte_lcore.h>
|
|
#include <rte_launch.h>
|
|
#endif
|
|
#include "prefetch.hh"
|
|
#include <exception>
|
|
#include <regex>
|
|
#ifdef __GNUC__
|
|
#include <iostream>
|
|
#include <system_error>
|
|
#include <cxxabi.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_OSV
|
|
#include <osv/newpoll.hh>
|
|
#endif
|
|
|
|
using namespace net;
|
|
|
|
std::atomic<lowres_clock::rep> lowres_clock::_now;
|
|
constexpr std::chrono::milliseconds lowres_clock::_granularity;
|
|
|
|
timespec to_timespec(clock_type::time_point t) {
|
|
using ns = std::chrono::nanoseconds;
|
|
auto n = std::chrono::duration_cast<ns>(t.time_since_epoch()).count();
|
|
return { n / 1'000'000'000, n % 1'000'000'000 };
|
|
}
|
|
|
|
lowres_clock::lowres_clock() {
|
|
_timer.set_callback([this] { update(); });
|
|
_timer.arm_periodic(_granularity);
|
|
}
|
|
|
|
void lowres_clock::update() {
|
|
auto ticks = _granularity.count();
|
|
_now.fetch_add(ticks, std::memory_order_relaxed);
|
|
}
|
|
|
|
template <typename T>
|
|
struct syscall_result {
|
|
T result;
|
|
int error;
|
|
void throw_if_error() {
|
|
if (long(result) == -1) {
|
|
throw std::system_error(error, std::system_category());
|
|
}
|
|
}
|
|
};
|
|
|
|
// Wrapper for a system call result containing the return value,
|
|
// an output parameter that was returned from the syscall, and errno.
|
|
template <typename Extra>
|
|
struct syscall_result_extra {
|
|
int result;
|
|
Extra extra;
|
|
int error;
|
|
void throw_if_error() {
|
|
if (result == -1) {
|
|
throw std::system_error(error, std::system_category());
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
syscall_result<T>
|
|
wrap_syscall(T result) {
|
|
syscall_result<T> sr;
|
|
sr.result = result;
|
|
sr.error = errno;
|
|
return sr;
|
|
}
|
|
|
|
template <typename Extra>
|
|
syscall_result_extra<Extra>
|
|
wrap_syscall(int result, const Extra& extra) {
|
|
return {result, extra, errno};
|
|
}
|
|
|
|
reactor_backend_epoll::reactor_backend_epoll()
|
|
: _epollfd(file_desc::epoll_create(EPOLL_CLOEXEC)) {
|
|
}
|
|
|
|
reactor::signals::signals() : _pending_signals(0) {
|
|
}
|
|
|
|
reactor::signals::~signals() {
|
|
sigset_t mask;
|
|
sigfillset(&mask);
|
|
::sigprocmask(SIG_BLOCK, &mask, NULL);
|
|
}
|
|
|
|
reactor::signals::signal_handler::signal_handler(int signo, std::function<void ()>&& handler)
|
|
: _handler(std::move(handler)) {
|
|
auto mask = make_sigset_mask(signo);
|
|
auto r = ::sigprocmask(SIG_UNBLOCK, &mask, NULL);
|
|
throw_system_error_on(r == -1);
|
|
struct sigaction sa;
|
|
sa.sa_sigaction = action;
|
|
sa.sa_mask = make_empty_sigset_mask();
|
|
sa.sa_flags = SA_SIGINFO | SA_RESTART;
|
|
r = ::sigaction(signo, &sa, nullptr);
|
|
throw_system_error_on(r == -1);
|
|
}
|
|
|
|
void
|
|
reactor::signals::handle_signal(int signo, std::function<void ()>&& handler) {
|
|
_signal_handlers.emplace(std::piecewise_construct,
|
|
std::make_tuple(signo), std::make_tuple(signo, std::move(handler)));
|
|
}
|
|
|
|
void
|
|
reactor::signals::handle_signal_once(int signo, std::function<void ()>&& handler) {
|
|
return handle_signal(signo, [fired = false, handler = std::move(handler)] () mutable {
|
|
if (!fired) {
|
|
fired = true;
|
|
handler();
|
|
}
|
|
});
|
|
}
|
|
|
|
bool reactor::signals::poll_signal() {
|
|
auto signals = _pending_signals.load(std::memory_order_relaxed);
|
|
if (signals) {
|
|
_pending_signals.fetch_and(~signals, std::memory_order_relaxed);
|
|
for (size_t i = 0; i < sizeof(signals)*8; i++) {
|
|
if (signals & (1ull << i)) {
|
|
_signal_handlers.at(i)._handler();
|
|
}
|
|
}
|
|
}
|
|
return signals;
|
|
}
|
|
|
|
void reactor::signals::action(int signo, siginfo_t* siginfo, void* ignore) {
|
|
engine()._signals._pending_signals.fetch_or(1ull << signo, std::memory_order_relaxed);
|
|
}
|
|
|
|
inline int alarm_signal() {
|
|
// We don't want to use SIGALRM, because the boost unit test library
|
|
// also plays with it.
|
|
return SIGRTMIN;
|
|
}
|
|
|
|
reactor::reactor()
|
|
: _backend()
|
|
#ifdef HAVE_OSV
|
|
, _timer_thread(
|
|
[&] { timer_thread_func(); }, sched::thread::attr().stack(4096).name("timer_thread").pin(sched::cpu::current()))
|
|
, _engine_thread(sched::thread::current())
|
|
#endif
|
|
, _exit_future(_exit_promise.get_future())
|
|
, _cpu_started(0)
|
|
, _io_context(0)
|
|
, _io_context_available(max_aio)
|
|
, _reuseport(posix_reuseport_detect()) {
|
|
|
|
seastar::thread_impl::init();
|
|
auto r = ::io_setup(max_aio, &_io_context);
|
|
assert(r >= 0);
|
|
#ifdef HAVE_OSV
|
|
_timer_thread.start();
|
|
#else
|
|
struct sigevent sev;
|
|
sev.sigev_notify = SIGEV_THREAD_ID;
|
|
sev._sigev_un._tid = syscall(SYS_gettid);
|
|
sev.sigev_signo = alarm_signal();
|
|
r = timer_create(CLOCK_REALTIME, &sev, &_timer);
|
|
assert(r >= 0);
|
|
sigset_t mask;
|
|
sigemptyset(&mask);
|
|
sigaddset(&mask, alarm_signal());
|
|
r = ::sigprocmask(SIG_BLOCK, &mask, NULL);
|
|
assert(r == 0);
|
|
#endif
|
|
memory::set_reclaim_hook([this] (std::function<void ()> reclaim_fn) {
|
|
// push it in the front of the queue so we reclaim memory quickly
|
|
_pending_tasks.push_front(make_task([fn = std::move(reclaim_fn)] {
|
|
fn();
|
|
}));
|
|
});
|
|
}
|
|
|
|
reactor::~reactor() {
|
|
timer_delete(_timer);
|
|
auto eraser = [](auto& list) {
|
|
while (!list.empty()) {
|
|
auto& timer = *list.begin();
|
|
timer.cancel();
|
|
}
|
|
};
|
|
eraser(_expired_timers);
|
|
eraser(_expired_lowres_timers);
|
|
}
|
|
|
|
#ifdef HAVE_OSV
|
|
void reactor::timer_thread_func() {
|
|
sched::timer tmr(*sched::thread::current());
|
|
WITH_LOCK(_timer_mutex) {
|
|
while (!_stopped) {
|
|
if (_timer_due != 0) {
|
|
set_timer(tmr, _timer_due);
|
|
_timer_cond.wait(_timer_mutex, &tmr);
|
|
if (tmr.expired()) {
|
|
_timer_due = 0;
|
|
_engine_thread->unsafe_stop();
|
|
_pending_tasks.push_front(make_task([this] {
|
|
complete_timers(_timers, _expired_timers, [this] {
|
|
if (!_timers.empty()) {
|
|
enable_timer(_timers.get_next_timeout());
|
|
}
|
|
});
|
|
}));
|
|
_engine_thread->wake();
|
|
} else {
|
|
tmr.cancel();
|
|
}
|
|
} else {
|
|
_timer_cond.wait(_timer_mutex);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void reactor::set_timer(sched::timer &tmr, s64 t) {
|
|
using namespace osv::clock;
|
|
tmr.set(wall::time_point(std::chrono::nanoseconds(t)));
|
|
}
|
|
#endif
|
|
|
|
void reactor::configure(boost::program_options::variables_map vm) {
|
|
auto network_stack_ready = vm.count("network-stack")
|
|
? network_stack_registry::create(sstring(vm["network-stack"].as<std::string>()), vm)
|
|
: network_stack_registry::create(vm);
|
|
network_stack_ready.then([this] (std::unique_ptr<network_stack> stack) {
|
|
_network_stack_ready_promise.set_value(std::move(stack));
|
|
});
|
|
|
|
_handle_sigint = !vm.count("no-handle-interrupt");
|
|
_task_quota = vm["task-quota"].as<int>();
|
|
}
|
|
|
|
future<> reactor_backend_epoll::get_epoll_future(pollable_fd_state& pfd,
|
|
promise<> pollable_fd_state::*pr, int event) {
|
|
if (pfd.events_known & event) {
|
|
pfd.events_known &= ~event;
|
|
return make_ready_future();
|
|
}
|
|
pfd.events_requested |= event;
|
|
if (!(pfd.events_epoll & event)) {
|
|
auto ctl = pfd.events_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
|
pfd.events_epoll |= event;
|
|
::epoll_event eevt;
|
|
eevt.events = pfd.events_epoll;
|
|
eevt.data.ptr = &pfd;
|
|
int r = ::epoll_ctl(_epollfd.get(), ctl, pfd.fd.get(), &eevt);
|
|
assert(r == 0);
|
|
engine().start_epoll();
|
|
}
|
|
pfd.*pr = promise<>();
|
|
return (pfd.*pr).get_future();
|
|
}
|
|
|
|
void reactor_backend_epoll::abort_fd(pollable_fd_state& pfd, std::exception_ptr ex,
|
|
promise<> pollable_fd_state::* pr, int event) {
|
|
if (pfd.events_epoll & event) {
|
|
pfd.events_epoll &= ~event;
|
|
auto ctl = pfd.events_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
|
|
::epoll_event eevt;
|
|
eevt.events = pfd.events_epoll;
|
|
eevt.data.ptr = &pfd;
|
|
int r = ::epoll_ctl(_epollfd.get(), ctl, pfd.fd.get(), &eevt);
|
|
assert(r == 0);
|
|
}
|
|
if (pfd.events_requested & event) {
|
|
pfd.events_requested &= ~event;
|
|
(pfd.*pr).set_exception(std::move(ex));
|
|
}
|
|
pfd.events_known &= ~event;
|
|
}
|
|
|
|
future<> reactor_backend_epoll::readable(pollable_fd_state& fd) {
|
|
return get_epoll_future(fd, &pollable_fd_state::pollin, EPOLLIN);
|
|
}
|
|
|
|
future<> reactor_backend_epoll::writeable(pollable_fd_state& fd) {
|
|
return get_epoll_future(fd, &pollable_fd_state::pollout, EPOLLOUT);
|
|
}
|
|
|
|
void reactor_backend_epoll::abort_reader(pollable_fd_state& fd, std::exception_ptr ex) {
|
|
abort_fd(fd, std::move(ex), &pollable_fd_state::pollin, EPOLLIN);
|
|
}
|
|
|
|
void reactor_backend_epoll::abort_writer(pollable_fd_state& fd, std::exception_ptr ex) {
|
|
abort_fd(fd, std::move(ex), &pollable_fd_state::pollout, EPOLLOUT);
|
|
}
|
|
|
|
void reactor_backend_epoll::forget(pollable_fd_state& fd) {
|
|
if (fd.events_epoll) {
|
|
::epoll_ctl(_epollfd.get(), EPOLL_CTL_DEL, fd.fd.get(), nullptr);
|
|
}
|
|
}
|
|
|
|
future<> reactor_backend_epoll::notified(reactor_notifier *n) {
|
|
// Currently reactor_backend_epoll doesn't need to support notifiers,
|
|
// because we add to it file descriptors instead. But this can be fixed
|
|
// later.
|
|
std::cout << "reactor_backend_epoll does not yet support notifiers!\n";
|
|
abort();
|
|
}
|
|
|
|
|
|
pollable_fd
|
|
reactor::posix_listen(socket_address sa, listen_options opts) {
|
|
file_desc fd = file_desc::socket(sa.u.sa.sa_family, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
|
|
if (opts.reuse_address) {
|
|
fd.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1);
|
|
}
|
|
if (_reuseport)
|
|
fd.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1);
|
|
|
|
fd.bind(sa.u.sa, sizeof(sa.u.sas));
|
|
fd.listen(100);
|
|
return pollable_fd(std::move(fd));
|
|
}
|
|
|
|
bool
|
|
reactor::posix_reuseport_detect() {
|
|
try {
|
|
file_desc fd = file_desc::socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
|
|
fd.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1);
|
|
return true;
|
|
} catch(std::system_error& e) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
future<pollable_fd>
|
|
reactor::posix_connect(socket_address sa, socket_address local) {
|
|
file_desc fd = file_desc::socket(sa.u.sa.sa_family, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
|
|
fd.bind(local.u.sa, sizeof(sa.u.sas));
|
|
fd.connect(sa.u.sa, sizeof(sa.u.sas));
|
|
auto pfd = pollable_fd(std::move(fd));
|
|
auto f = pfd.writeable();
|
|
return f.then([pfd = std::move(pfd)] () mutable {
|
|
int err;
|
|
pfd.get_file_desc().getsockopt(SOL_SOCKET, SO_ERROR, err);
|
|
throw_system_error_on(err != 0);
|
|
return make_ready_future<pollable_fd>(std::move(pfd));
|
|
});
|
|
}
|
|
|
|
server_socket
|
|
reactor::listen(socket_address sa, listen_options opt) {
|
|
return _network_stack->listen(sa, opt);
|
|
}
|
|
|
|
future<connected_socket>
|
|
reactor::connect(socket_address sa) {
|
|
return _network_stack->connect(sa);
|
|
}
|
|
|
|
void reactor_backend_epoll::complete_epoll_event(pollable_fd_state& pfd, promise<> pollable_fd_state::*pr,
|
|
int events, int event) {
|
|
if (pfd.events_requested & events & event) {
|
|
pfd.events_requested &= ~event;
|
|
pfd.events_known &= ~event;
|
|
(pfd.*pr).set_value();
|
|
pfd.*pr = promise<>();
|
|
}
|
|
}
|
|
|
|
template <typename Func>
|
|
future<io_event>
|
|
reactor::submit_io(Func prepare_io) {
|
|
return _io_context_available.wait(1).then([this, prepare_io = std::move(prepare_io)] () mutable {
|
|
auto pr = std::make_unique<promise<io_event>>();
|
|
iocb io;
|
|
prepare_io(io);
|
|
io.data = pr.get();
|
|
_pending_aio.push_back(io);
|
|
if (_pending_aio.size() >= max_aio / 4) {
|
|
flush_pending_aio();
|
|
}
|
|
return pr.release()->get_future();
|
|
});
|
|
}
|
|
|
|
bool
|
|
reactor::flush_pending_aio() {
|
|
while (!_pending_aio.empty()) {
|
|
auto nr = _pending_aio.size();
|
|
struct iocb* iocbs[max_aio];
|
|
for (size_t i = 0; i < nr; ++i) {
|
|
iocbs[i] = &_pending_aio[i];
|
|
}
|
|
auto r = ::io_submit(_io_context, nr, iocbs);
|
|
throw_kernel_error(r);
|
|
if (size_t(r) == nr) {
|
|
_pending_aio.clear();
|
|
} else {
|
|
_pending_aio.erase(_pending_aio.begin(), _pending_aio.begin() + r);
|
|
}
|
|
}
|
|
return false; // We always submit all pending aios
|
|
}
|
|
|
|
template <typename Func>
|
|
future<io_event>
|
|
reactor::submit_io_read(Func prepare_io) {
|
|
++_aio_reads;
|
|
return submit_io(std::move(prepare_io));
|
|
}
|
|
|
|
template <typename Func>
|
|
future<io_event>
|
|
reactor::submit_io_write(Func prepare_io) {
|
|
++_aio_writes;
|
|
return submit_io(std::move(prepare_io));
|
|
}
|
|
|
|
bool reactor::process_io()
|
|
{
|
|
io_event ev[max_aio];
|
|
struct timespec timeout = {0, 0};
|
|
auto n = ::io_getevents(_io_context, 1, max_aio, ev, &timeout);
|
|
assert(n >= 0);
|
|
for (size_t i = 0; i < size_t(n); ++i) {
|
|
auto pr = reinterpret_cast<promise<io_event>*>(ev[i].data);
|
|
pr->set_value(ev[i]);
|
|
delete pr;
|
|
}
|
|
_io_context_available.signal(n);
|
|
return n;
|
|
}
|
|
|
|
posix_file_impl::~posix_file_impl() {
|
|
if (_fd != -1) {
|
|
if (std::uncaught_exception()) {
|
|
std::cerr << "WARNING: closing file in reactor thread during exception recovery\n";
|
|
} else {
|
|
std::cerr << "WARNING: closing file in reactor thread\n";
|
|
}
|
|
::close(_fd);
|
|
}
|
|
}
|
|
|
|
future<size_t>
|
|
posix_file_impl::write_dma(uint64_t pos, const void* buffer, size_t len) {
|
|
return engine().submit_io_write([this, pos, buffer, len] (iocb& io) {
|
|
io_prep_pwrite(&io, _fd, const_cast<void*>(buffer), len, pos);
|
|
}).then([] (io_event ev) {
|
|
throw_kernel_error(long(ev.res));
|
|
return make_ready_future<size_t>(size_t(ev.res));
|
|
});
|
|
}
|
|
|
|
future<size_t>
|
|
posix_file_impl::write_dma(uint64_t pos, std::vector<iovec> iov) {
|
|
return engine().submit_io_write([this, pos, iov = std::move(iov)] (iocb& io) {
|
|
io_prep_pwritev(&io, _fd, iov.data(), iov.size(), pos);
|
|
}).then([] (io_event ev) {
|
|
throw_kernel_error(long(ev.res));
|
|
return make_ready_future<size_t>(size_t(ev.res));
|
|
});
|
|
}
|
|
|
|
future<size_t>
|
|
posix_file_impl::read_dma(uint64_t pos, void* buffer, size_t len) {
|
|
return engine().submit_io_read([this, pos, buffer, len] (iocb& io) {
|
|
io_prep_pread(&io, _fd, buffer, len, pos);
|
|
}).then([] (io_event ev) {
|
|
throw_kernel_error(long(ev.res));
|
|
return make_ready_future<size_t>(size_t(ev.res));
|
|
});
|
|
}
|
|
|
|
future<size_t>
|
|
posix_file_impl::read_dma(uint64_t pos, std::vector<iovec> iov) {
|
|
return engine().submit_io_read([this, pos, iov = std::move(iov)] (iocb& io) {
|
|
io_prep_preadv(&io, _fd, iov.data(), iov.size(), pos);
|
|
}).then([] (io_event ev) {
|
|
throw_kernel_error(long(ev.res));
|
|
return make_ready_future<size_t>(size_t(ev.res));
|
|
});
|
|
}
|
|
|
|
future<file>
|
|
reactor::open_file_dma(sstring name, open_flags flags) {
|
|
return _thread_pool.submit<syscall_result<int>>([name, flags] {
|
|
return wrap_syscall<int>(::open(name.c_str(), O_DIRECT | O_CLOEXEC | static_cast<int>(flags), S_IRWXU));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<file>(file(sr.result));
|
|
});
|
|
}
|
|
|
|
future<>
|
|
reactor::remove_file(sstring pathname) {
|
|
return engine()._thread_pool.submit<syscall_result<int>>([this, pathname] {
|
|
return wrap_syscall<int>(::remove(pathname.c_str()));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<>();
|
|
});
|
|
}
|
|
|
|
future<>
|
|
reactor::rename_file(sstring old_pathname, sstring new_pathname) {
|
|
return engine()._thread_pool.submit<syscall_result<int>>([this, old_pathname, new_pathname] {
|
|
return wrap_syscall<int>(::rename(old_pathname.c_str(), new_pathname.c_str()));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<>();
|
|
});
|
|
}
|
|
|
|
directory_entry_type stat_to_entry_type(__mode_t type) {
|
|
if (S_ISDIR(type)) {
|
|
return directory_entry_type::directory;
|
|
}
|
|
if (S_ISBLK(type)) {
|
|
return directory_entry_type::block_device;
|
|
}
|
|
if (S_ISCHR(type)) {
|
|
return directory_entry_type::char_device;
|
|
}
|
|
if (S_ISFIFO(type)) {
|
|
return directory_entry_type::fifo;
|
|
}
|
|
if (S_ISLNK(type)) {
|
|
return directory_entry_type::link;
|
|
}
|
|
return directory_entry_type::regular;
|
|
|
|
}
|
|
|
|
future<std::experimental::optional<directory_entry_type>>
|
|
reactor::file_type(sstring name) {
|
|
return _thread_pool.submit<syscall_result_extra<struct stat>>([name] {
|
|
struct stat st;
|
|
auto ret = stat(name.c_str(), &st);
|
|
return wrap_syscall(ret, st);
|
|
}).then([] (syscall_result_extra<struct stat> sr) {
|
|
if (long(sr.result) == -1) {
|
|
if (sr.error != ENOENT && sr.error != ENOTDIR) {
|
|
sr.throw_if_error();
|
|
}
|
|
return make_ready_future<std::experimental::optional<directory_entry_type> >
|
|
(std::experimental::optional<directory_entry_type>() );
|
|
}
|
|
return make_ready_future<std::experimental::optional<directory_entry_type> >
|
|
(std::experimental::optional<directory_entry_type>(stat_to_entry_type(sr.extra.st_mode)) );
|
|
});
|
|
}
|
|
|
|
future<file>
|
|
reactor::open_directory(sstring name) {
|
|
return _thread_pool.submit<syscall_result<int>>([name] {
|
|
return wrap_syscall<int>(::open(name.c_str(), O_DIRECTORY | O_CLOEXEC | O_RDONLY));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<file>(file(sr.result));
|
|
});
|
|
}
|
|
|
|
future<>
|
|
reactor::make_directory(sstring name) {
|
|
return _thread_pool.submit<syscall_result<int>>([name = std::move(name)] {
|
|
return wrap_syscall<int>(::mkdir(name.c_str(), S_IRWXU));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
});
|
|
}
|
|
|
|
future<>
|
|
posix_file_impl::flush(void) {
|
|
++engine()._fsyncs;
|
|
return engine()._thread_pool.submit<syscall_result<int>>([this] {
|
|
return wrap_syscall<int>(::fsync(_fd));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<>();
|
|
});
|
|
}
|
|
|
|
future<struct stat>
|
|
posix_file_impl::stat(void) {
|
|
return engine()._thread_pool.submit<syscall_result_extra<struct stat>>([this] {
|
|
struct stat st;
|
|
auto ret = ::fstat(_fd, &st);
|
|
return wrap_syscall(ret, st);
|
|
}).then([] (syscall_result_extra<struct stat> ret) {
|
|
ret.throw_if_error();
|
|
return make_ready_future<struct stat>(ret.extra);
|
|
});
|
|
}
|
|
|
|
future<>
|
|
posix_file_impl::truncate(uint64_t length) {
|
|
return engine()._thread_pool.submit<syscall_result<int>>([this, length] {
|
|
return wrap_syscall<int>(::ftruncate(_fd, length));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<>();
|
|
});
|
|
}
|
|
|
|
future<>
|
|
blockdev_file_impl::truncate(uint64_t length) {
|
|
return make_ready_future<>();
|
|
}
|
|
|
|
future<>
|
|
posix_file_impl::discard(uint64_t offset, uint64_t length) {
|
|
return engine()._thread_pool.submit<syscall_result<int>>([this, offset, length] () mutable {
|
|
return wrap_syscall<int>(::fallocate(_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
|
|
offset, length));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<>();
|
|
});
|
|
}
|
|
|
|
future<>
|
|
posix_file_impl::allocate(uint64_t position, uint64_t length) {
|
|
#ifdef FALLOC_FL_ZERO_RANGE
|
|
// FALLOC_FL_ZERO_RANGE is fairly new, so don't fail if it's not supported.
|
|
static bool supported = true;
|
|
if (!supported) {
|
|
return make_ready_future<>();
|
|
}
|
|
return engine()._thread_pool.submit<syscall_result<int>>([this, position, length] () mutable {
|
|
auto ret = ::fallocate(_fd, FALLOC_FL_ZERO_RANGE|FALLOC_FL_KEEP_SIZE, position, length);
|
|
if (ret == -1 && errno == EOPNOTSUPP) {
|
|
ret = 0;
|
|
supported = false; // Racy, but harmless. At most we issue an extra call or two.
|
|
}
|
|
return wrap_syscall<int>(ret);
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<>();
|
|
});
|
|
#else
|
|
return make_ready_future<>();
|
|
#endif
|
|
}
|
|
|
|
future<>
|
|
blockdev_file_impl::discard(uint64_t offset, uint64_t length) {
|
|
return engine()._thread_pool.submit<syscall_result<int>>([this, offset, length] () mutable {
|
|
uint64_t range[2] { offset, length };
|
|
return wrap_syscall<int>(::ioctl(_fd, BLKDISCARD, &range));
|
|
}).then([] (syscall_result<int> sr) {
|
|
sr.throw_if_error();
|
|
return make_ready_future<>();
|
|
});
|
|
}
|
|
|
|
future<>
|
|
blockdev_file_impl::allocate(uint64_t position, uint64_t length) {
|
|
// nothing to do for block device
|
|
return make_ready_future<>();
|
|
}
|
|
|
|
future<uint64_t>
|
|
posix_file_impl::size(void) {
|
|
auto r = ::lseek(_fd, 0, SEEK_END);
|
|
if (r == -1) {
|
|
return make_exception_future<uint64_t>(std::system_error(errno, std::system_category()));
|
|
}
|
|
return make_ready_future<uint64_t>(r);
|
|
}
|
|
|
|
future<>
|
|
posix_file_impl::close() {
|
|
return engine()._thread_pool.submit<syscall_result<int>>([fd = _fd] {
|
|
return wrap_syscall<int>(::close(fd));
|
|
}).then([this] (syscall_result<int> sr) {
|
|
_fd = -1;
|
|
sr.throw_if_error();
|
|
});
|
|
}
|
|
|
|
future<uint64_t>
|
|
blockdev_file_impl::size(void) {
|
|
return engine()._thread_pool.submit<syscall_result_extra<size_t>>([this] {
|
|
uint64_t size;
|
|
int ret = ::ioctl(_fd, BLKGETSIZE64, &size);
|
|
return wrap_syscall(ret, size);
|
|
}).then([] (syscall_result_extra<uint64_t> ret) {
|
|
ret.throw_if_error();
|
|
return make_ready_future<uint64_t>(ret.extra);
|
|
});
|
|
}
|
|
|
|
subscription<directory_entry>
|
|
posix_file_impl::list_directory(std::function<future<> (directory_entry de)> next) {
|
|
struct work {
|
|
stream<directory_entry> s;
|
|
unsigned current = 0;
|
|
unsigned total = 0;
|
|
bool eof = false;
|
|
int error = 0;
|
|
char buffer[8192];
|
|
};
|
|
|
|
// While it would be natural to use fdopendir()/readdir(),
|
|
// our syscall thread pool doesn't support malloc(), which is
|
|
// required for this to work. So resort to using getdents()
|
|
// instead.
|
|
|
|
// From getdents(2):
|
|
struct linux_dirent {
|
|
unsigned long d_ino; /* Inode number */
|
|
unsigned long d_off; /* Offset to next linux_dirent */
|
|
unsigned short d_reclen; /* Length of this linux_dirent */
|
|
char d_name[]; /* Filename (null-terminated) */
|
|
/* length is actually (d_reclen - 2 -
|
|
offsetof(struct linux_dirent, d_name)) */
|
|
/*
|
|
char pad; // Zero padding byte
|
|
char d_type; // File type (only since Linux
|
|
// 2.6.4); offset is (d_reclen - 1)
|
|
*/
|
|
};
|
|
|
|
auto w = make_lw_shared<work>();
|
|
auto ret = w->s.listen(std::move(next));
|
|
w->s.started().then([w, this] {
|
|
auto eofcond = [w] { return w->eof; };
|
|
return do_until(eofcond, [w, this] {
|
|
if (w->current == w->total) {
|
|
return engine()._thread_pool.submit<syscall_result<long>>([w , this] () {
|
|
auto ret = ::syscall(__NR_getdents, _fd, reinterpret_cast<linux_dirent*>(w->buffer), sizeof(w->buffer));
|
|
return wrap_syscall(ret);
|
|
}).then([w] (syscall_result<long> ret) {
|
|
ret.throw_if_error();
|
|
if (ret.result == 0) {
|
|
w->eof = true;
|
|
} else {
|
|
w->current = 0;
|
|
w->total = ret.result;
|
|
}
|
|
});
|
|
}
|
|
auto start = w->buffer + w->current;
|
|
auto de = reinterpret_cast<linux_dirent*>(start);
|
|
std::experimental::optional<directory_entry_type> type;
|
|
switch (start[de->d_reclen - 1]) {
|
|
case DT_BLK:
|
|
type = directory_entry_type::block_device;
|
|
break;
|
|
case DT_CHR:
|
|
type = directory_entry_type::char_device;
|
|
break;
|
|
case DT_DIR:
|
|
type = directory_entry_type::directory;
|
|
break;
|
|
case DT_FIFO:
|
|
type = directory_entry_type::fifo;
|
|
break;
|
|
case DT_REG:
|
|
type = directory_entry_type::regular;
|
|
break;
|
|
case DT_SOCK:
|
|
type = directory_entry_type::socket;
|
|
break;
|
|
default:
|
|
// unknown, ignore
|
|
;
|
|
}
|
|
w->current += de->d_reclen;
|
|
sstring name = de->d_name;
|
|
if (name == "." || name == "..") {
|
|
return make_ready_future<>();
|
|
}
|
|
return w->s.produce({std::move(name), type});
|
|
});
|
|
}).then([w] {
|
|
w->s.close();
|
|
});
|
|
return ret;
|
|
}
|
|
|
|
void reactor::enable_timer(clock_type::time_point when)
|
|
{
|
|
#ifndef HAVE_OSV
|
|
itimerspec its;
|
|
its.it_interval = {};
|
|
its.it_value = to_timespec(when);
|
|
auto ret = timer_settime(_timer, TIMER_ABSTIME, &its, NULL);
|
|
throw_system_error_on(ret == -1);
|
|
#else
|
|
using ns = std::chrono::nanoseconds;
|
|
WITH_LOCK(_timer_mutex) {
|
|
_timer_due = std::chrono::duration_cast<ns>(when.time_since_epoch()).count();
|
|
_timer_cond.wake_one();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void reactor::add_timer(timer<>* tmr) {
|
|
if (queue_timer(tmr)) {
|
|
enable_timer(_timers.get_next_timeout());
|
|
}
|
|
}
|
|
|
|
bool reactor::queue_timer(timer<>* tmr) {
|
|
return _timers.insert(*tmr);
|
|
}
|
|
|
|
void reactor::del_timer(timer<>* tmr) {
|
|
if (tmr->_expired) {
|
|
_expired_timers.erase(_expired_timers.iterator_to(*tmr));
|
|
tmr->_expired = false;
|
|
} else {
|
|
_timers.remove(*tmr);
|
|
}
|
|
}
|
|
|
|
void reactor::add_timer(timer<lowres_clock>* tmr) {
|
|
if (queue_timer(tmr)) {
|
|
_lowres_next_timeout = _lowres_timers.get_next_timeout();
|
|
}
|
|
}
|
|
|
|
bool reactor::queue_timer(timer<lowres_clock>* tmr) {
|
|
return _lowres_timers.insert(*tmr);
|
|
}
|
|
|
|
void reactor::del_timer(timer<lowres_clock>* tmr) {
|
|
if (tmr->_expired) {
|
|
_expired_lowres_timers.erase(_expired_lowres_timers.iterator_to(*tmr));
|
|
tmr->_expired = false;
|
|
} else {
|
|
_lowres_timers.remove(*tmr);
|
|
}
|
|
}
|
|
|
|
future<> reactor::run_exit_tasks() {
|
|
_exit_promise.set_value();
|
|
return std::move(_exit_future);
|
|
}
|
|
|
|
void reactor::stop() {
|
|
assert(engine()._id == 0);
|
|
run_exit_tasks().then([this] {
|
|
auto sem = new semaphore(0);
|
|
for (unsigned i = 1; i < smp::count; i++) {
|
|
smp::submit_to<>(i, []() {
|
|
return engine().run_exit_tasks().then([] {
|
|
engine()._stopped = true;
|
|
});
|
|
}).then([sem, i]() {
|
|
sem->signal();
|
|
});
|
|
}
|
|
sem->wait(smp::count - 1).then([sem, this](){
|
|
_stopped = true;
|
|
delete sem;
|
|
});
|
|
});
|
|
}
|
|
|
|
void reactor::exit(int ret) {
|
|
smp::submit_to(0, [this, ret] { _return = ret; stop(); });
|
|
}
|
|
|
|
|
|
struct reactor::collectd_registrations {
|
|
scollectd::registrations regs;
|
|
};
|
|
|
|
reactor::collectd_registrations
|
|
reactor::register_collectd_metrics() {
|
|
return collectd_registrations{ {
|
|
// queue_length value:GAUGE:0:U
|
|
// Absolute value of num tasks in queue.
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("reactor"
|
|
, scollectd::per_cpu_plugin_instance
|
|
, "queue_length", "tasks-pending")
|
|
, scollectd::make_typed(scollectd::data_type::GAUGE
|
|
, std::bind(&decltype(_pending_tasks)::size, &_pending_tasks))
|
|
),
|
|
// total_operations value:DERIVE:0:U
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("reactor"
|
|
, scollectd::per_cpu_plugin_instance
|
|
, "total_operations", "tasks-processed")
|
|
, scollectd::make_typed(scollectd::data_type::DERIVE, _tasks_processed)
|
|
),
|
|
// queue_length value:GAUGE:0:U
|
|
// Absolute value of num timers in queue.
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("reactor"
|
|
, scollectd::per_cpu_plugin_instance
|
|
, "queue_length", "timers-pending")
|
|
, scollectd::make_typed(scollectd::data_type::GAUGE
|
|
, std::bind(&decltype(_timers)::size, &_timers))
|
|
),
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("reactor"
|
|
, scollectd::per_cpu_plugin_instance
|
|
, "queue_length", "idle")
|
|
, scollectd::make_typed(scollectd::data_type::GAUGE,
|
|
[this] () -> uint32_t { return _load * 100; })
|
|
),
|
|
// total_operations value:DERIVE:0:U
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("reactor"
|
|
, scollectd::per_cpu_plugin_instance
|
|
, "total_operations", "aio-reads")
|
|
, scollectd::make_typed(scollectd::data_type::DERIVE, _aio_reads)
|
|
),
|
|
// total_operations value:DERIVE:0:U
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("reactor"
|
|
, scollectd::per_cpu_plugin_instance
|
|
, "total_operations", "aio-writes")
|
|
, scollectd::make_typed(scollectd::data_type::DERIVE, _aio_writes)
|
|
),
|
|
// total_operations value:DERIVE:0:U
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("reactor"
|
|
, scollectd::per_cpu_plugin_instance
|
|
, "total_operations", "fsyncs")
|
|
, scollectd::make_typed(scollectd::data_type::DERIVE, _fsyncs)
|
|
),
|
|
// total_operations value:DERIVE:0:U
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("reactor"
|
|
, scollectd::per_cpu_plugin_instance
|
|
, "total_operations", "io-threaded-fallbacks")
|
|
, scollectd::make_typed(scollectd::data_type::DERIVE,
|
|
std::bind(&thread_pool::operation_count, &_thread_pool))
|
|
),
|
|
scollectd::add_polled_metric(
|
|
scollectd::type_instance_id("memory",
|
|
scollectd::per_cpu_plugin_instance,
|
|
"total_operations", "malloc"),
|
|
scollectd::make_typed(scollectd::data_type::DERIVE,
|
|
[] { return memory::stats().mallocs(); })
|
|
),
|
|
scollectd::add_polled_metric(
|
|
scollectd::type_instance_id("memory",
|
|
scollectd::per_cpu_plugin_instance,
|
|
"total_operations", "free"),
|
|
scollectd::make_typed(scollectd::data_type::DERIVE,
|
|
[] { return memory::stats().frees(); })
|
|
),
|
|
scollectd::add_polled_metric(
|
|
scollectd::type_instance_id("memory",
|
|
scollectd::per_cpu_plugin_instance,
|
|
"total_operations", "cross_cpu_free"),
|
|
scollectd::make_typed(scollectd::data_type::DERIVE,
|
|
[] { return memory::stats().cross_cpu_frees(); })
|
|
),
|
|
scollectd::add_polled_metric(
|
|
scollectd::type_instance_id("memory",
|
|
scollectd::per_cpu_plugin_instance,
|
|
"objects", "malloc"),
|
|
scollectd::make_typed(scollectd::data_type::GAUGE,
|
|
[] { return memory::stats().live_objects(); })
|
|
),
|
|
scollectd::add_polled_metric(
|
|
scollectd::type_instance_id("memory",
|
|
scollectd::per_cpu_plugin_instance,
|
|
"memory", "free_memory"),
|
|
scollectd::make_typed(scollectd::data_type::GAUGE,
|
|
[] { return memory::stats().free_memory(); })
|
|
),
|
|
scollectd::add_polled_metric(
|
|
scollectd::type_instance_id("memory",
|
|
scollectd::per_cpu_plugin_instance,
|
|
"total_operations", "reclaims"),
|
|
scollectd::make_typed(scollectd::data_type::DERIVE,
|
|
[] { return memory::stats().reclaims(); })
|
|
),
|
|
} };
|
|
}
|
|
|
|
void reactor::run_tasks(circular_buffer<std::unique_ptr<task>>& tasks, size_t quota) {
|
|
task_quota = quota;
|
|
while (!tasks.empty() && task_quota) {
|
|
--task_quota;
|
|
auto tsk = std::move(tasks.front());
|
|
tasks.pop_front();
|
|
tsk->run();
|
|
tsk.reset();
|
|
++_tasks_processed;
|
|
}
|
|
}
|
|
|
|
int reactor::run() {
|
|
auto collectd_metrics = register_collectd_metrics();
|
|
|
|
#ifndef HAVE_OSV
|
|
poller io_poller([&] { return process_io(); });
|
|
#endif
|
|
|
|
poller sig_poller([&] { return _signals.poll_signal(); } );
|
|
poller aio_poller(std::bind(&reactor::flush_pending_aio, this));
|
|
|
|
if (_id == 0) {
|
|
if (_handle_sigint) {
|
|
_signals.handle_signal_once(SIGINT, [this] { stop(); });
|
|
}
|
|
_signals.handle_signal_once(SIGTERM, [this] { stop(); });
|
|
}
|
|
|
|
_cpu_started.wait(smp::count).then([this] {
|
|
_network_stack->initialize().then([this] {
|
|
_start_promise.set_value();
|
|
});
|
|
});
|
|
_network_stack_ready_promise.get_future().then([this] (std::unique_ptr<network_stack> stack) {
|
|
_network_stack = std::move(stack);
|
|
for (unsigned c = 0; c < smp::count; c++) {
|
|
smp::submit_to(c, [] {
|
|
engine()._cpu_started.signal();
|
|
});
|
|
}
|
|
});
|
|
|
|
// Register smp queues poller
|
|
std::experimental::optional<poller> smp_poller;
|
|
if (smp::count > 1) {
|
|
smp_poller = poller(smp::poll_queues);
|
|
}
|
|
|
|
#ifndef HAVE_OSV
|
|
_signals.handle_signal(alarm_signal(), [this] {
|
|
complete_timers(_timers, _expired_timers, [this] {
|
|
if (!_timers.empty()) {
|
|
enable_timer(_timers.get_next_timeout());
|
|
}
|
|
});
|
|
});
|
|
#endif
|
|
|
|
poller drain_cross_cpu_freelist([] {
|
|
return memory::drain_cross_cpu_freelist();
|
|
});
|
|
|
|
poller expire_lowres_timers([this] {
|
|
if (_lowres_next_timeout == lowres_clock::time_point()) {
|
|
return false;
|
|
}
|
|
auto now = lowres_clock::now();
|
|
if (now > _lowres_next_timeout) {
|
|
complete_timers(_lowres_timers, _expired_lowres_timers, [this] {
|
|
if (!_lowres_timers.empty()) {
|
|
_lowres_next_timeout = _lowres_timers.get_next_timeout();
|
|
} else {
|
|
_lowres_next_timeout = lowres_clock::time_point();
|
|
}
|
|
});
|
|
return true;
|
|
}
|
|
return false;
|
|
});
|
|
|
|
using namespace std::chrono_literals;
|
|
timer<lowres_clock> load_timer;
|
|
std::chrono::high_resolution_clock::rep idle_count = 0;
|
|
auto idle_start = std::chrono::high_resolution_clock::now(), idle_end = idle_start;
|
|
load_timer.set_callback([this, &idle_count, &idle_start, &idle_end] () mutable {
|
|
auto load = double(idle_count + (idle_end - idle_start).count()) / double(std::chrono::duration_cast<std::chrono::high_resolution_clock::duration>(1s).count());
|
|
load = std::min(load, 1.0);
|
|
idle_count = 0;
|
|
idle_start = idle_end;
|
|
_loads.push_front(load);
|
|
if (_loads.size() > 5) {
|
|
auto drop = _loads.back();
|
|
_loads.pop_back();
|
|
_load -= (drop/5);
|
|
}
|
|
_load += (load/5);
|
|
});
|
|
load_timer.arm_periodic(1s);
|
|
|
|
bool idle = false;
|
|
|
|
while (true) {
|
|
run_tasks(_pending_tasks, _task_quota);
|
|
if (_stopped) {
|
|
load_timer.cancel();
|
|
run_tasks(_at_destroy_tasks, _at_destroy_tasks.size());
|
|
if (_id == 0) {
|
|
smp::join_all();
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (!poll_once() && _pending_tasks.empty()) {
|
|
idle_end = std::chrono::high_resolution_clock::now();
|
|
if (!idle) {
|
|
idle_start = idle_end;
|
|
idle = true;
|
|
}
|
|
_mm_pause();
|
|
} else {
|
|
if (idle) {
|
|
idle_count += (idle_end - idle_start).count();
|
|
idle = false;
|
|
}
|
|
}
|
|
}
|
|
return _return;
|
|
}
|
|
|
|
bool
|
|
reactor::poll_once() {
|
|
bool work = false;
|
|
for (auto c : _pollers) {
|
|
work |= c->poll_and_check_more_work();
|
|
}
|
|
|
|
return work;
|
|
}
|
|
|
|
class reactor::poller::registration_task : public task {
|
|
private:
|
|
poller* _p;
|
|
public:
|
|
explicit registration_task(poller* p) : _p(p) {}
|
|
virtual void run() noexcept override {
|
|
if (_p) {
|
|
engine().register_poller(_p->_pollfn.get());
|
|
_p->_registration_task = nullptr;
|
|
}
|
|
}
|
|
void cancel() {
|
|
_p = nullptr;
|
|
}
|
|
void moved(poller* p) {
|
|
_p = p;
|
|
}
|
|
};
|
|
|
|
class reactor::poller::deregistration_task : public task {
|
|
private:
|
|
std::unique_ptr<pollfn> _p;
|
|
public:
|
|
explicit deregistration_task(std::unique_ptr<pollfn>&& p) : _p(std::move(p)) {}
|
|
virtual void run() noexcept override {
|
|
engine().unregister_poller(_p.get());
|
|
}
|
|
};
|
|
|
|
void reactor::register_poller(pollfn* p) {
|
|
_pollers.push_back(p);
|
|
}
|
|
|
|
void reactor::unregister_poller(pollfn* p) {
|
|
_pollers.erase(std::find(_pollers.begin(), _pollers.end(), p));
|
|
}
|
|
|
|
void reactor::replace_poller(pollfn* old, pollfn* neww) {
|
|
std::replace(_pollers.begin(), _pollers.end(), old, neww);
|
|
}
|
|
|
|
reactor::poller::poller(poller&& x)
|
|
: _pollfn(std::move(x._pollfn)), _registration_task(x._registration_task) {
|
|
if (_pollfn && _registration_task) {
|
|
_registration_task->moved(this);
|
|
}
|
|
}
|
|
|
|
reactor::poller&
|
|
reactor::poller::operator=(poller&& x) {
|
|
if (this != &x) {
|
|
this->~poller();
|
|
new (this) poller(std::move(x));
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
void
|
|
reactor::poller::do_register() {
|
|
// We can't just insert a poller into reactor::_pollers, because we
|
|
// may be running inside a poller ourselves, and so in the middle of
|
|
// iterating reactor::_pollers itself. So we schedule a task to add
|
|
// the poller instead.
|
|
auto task = std::make_unique<registration_task>(this);
|
|
auto tmp = task.get();
|
|
engine().add_task(std::move(task));
|
|
_registration_task = tmp;
|
|
}
|
|
|
|
reactor::poller::~poller() {
|
|
// We can't just remove the poller from reactor::_pollers, because we
|
|
// may be running inside a poller ourselves, and so in the middle of
|
|
// iterating reactor::_pollers itself. So we schedule a task to remove
|
|
// the poller instead.
|
|
//
|
|
// Since we don't want to call the poller after we exit the destructor,
|
|
// we replace it atomically with another one, and schedule a task to
|
|
// delete the replacement.
|
|
if (_pollfn) {
|
|
if (_registration_task) {
|
|
// not added yet, so don't do it at all.
|
|
_registration_task->cancel();
|
|
} else {
|
|
auto dummy = make_pollfn([] { return false; });
|
|
auto dummy_p = dummy.get();
|
|
auto task = std::make_unique<deregistration_task>(std::move(dummy));
|
|
engine().add_task(std::move(task));
|
|
engine().replace_poller(_pollfn.get(), dummy_p);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool
|
|
reactor_backend_epoll::wait_and_process() {
|
|
std::array<epoll_event, 128> eevt;
|
|
int nr = ::epoll_wait(_epollfd.get(), eevt.data(), eevt.size(), 0);
|
|
if (nr == -1 && errno == EINTR) {
|
|
return false; // gdb can cause this
|
|
}
|
|
assert(nr != -1);
|
|
for (int i = 0; i < nr; ++i) {
|
|
auto& evt = eevt[i];
|
|
auto pfd = reinterpret_cast<pollable_fd_state*>(evt.data.ptr);
|
|
auto events = evt.events & (EPOLLIN | EPOLLOUT);
|
|
auto events_to_remove = events & ~pfd->events_requested;
|
|
complete_epoll_event(*pfd, &pollable_fd_state::pollin, events, EPOLLIN);
|
|
complete_epoll_event(*pfd, &pollable_fd_state::pollout, events, EPOLLOUT);
|
|
if (events_to_remove) {
|
|
pfd->events_epoll &= ~events_to_remove;
|
|
evt.events = pfd->events_epoll;
|
|
auto op = evt.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
|
|
::epoll_ctl(_epollfd.get(), op, pfd->fd.get(), &evt);
|
|
}
|
|
}
|
|
return nr;
|
|
}
|
|
|
|
syscall_work_queue::syscall_work_queue()
|
|
: _pending()
|
|
, _completed()
|
|
, _start_eventfd(0) {
|
|
}
|
|
|
|
void syscall_work_queue::submit_item(syscall_work_queue::work_item* item) {
|
|
_queue_has_room.wait().then([this, item] {
|
|
_pending.push(item);
|
|
_start_eventfd.signal(1);
|
|
});
|
|
}
|
|
|
|
void syscall_work_queue::complete() {
|
|
auto nr = _completed.consume_all([this] (work_item* wi) {
|
|
wi->complete();
|
|
delete wi;
|
|
});
|
|
_queue_has_room.signal(nr);
|
|
}
|
|
|
|
smp_message_queue::smp_message_queue()
|
|
: _pending()
|
|
, _completed()
|
|
{
|
|
}
|
|
|
|
void smp_message_queue::move_pending() {
|
|
auto queue_room = queue_length - _current_queue_length;
|
|
auto nr = std::min(queue_room, _tx.a.pending_fifo.size());
|
|
if (!nr) {
|
|
return;
|
|
}
|
|
auto begin = _tx.a.pending_fifo.begin();
|
|
auto end = begin + nr;
|
|
_pending.push(begin, end);
|
|
_tx.a.pending_fifo.erase(begin, end);
|
|
_current_queue_length += nr;
|
|
_last_snt_batch = nr;
|
|
_sent += nr;
|
|
}
|
|
|
|
void smp_message_queue::submit_item(smp_message_queue::work_item* item) {
|
|
_tx.a.pending_fifo.push_back(item);
|
|
if (_tx.a.pending_fifo.size() >= batch_size) {
|
|
move_pending();
|
|
}
|
|
}
|
|
|
|
void smp_message_queue::respond(work_item* item) {
|
|
_completed_fifo.push_back(item);
|
|
if (_completed_fifo.size() >= batch_size || engine()._stopped) {
|
|
flush_response_batch();
|
|
}
|
|
}
|
|
|
|
void smp_message_queue::flush_response_batch() {
|
|
if (!_completed_fifo.empty()) {
|
|
_completed.push(_completed_fifo.begin(), _completed_fifo.end());
|
|
_completed_fifo.clear();
|
|
}
|
|
}
|
|
|
|
template<size_t PrefetchCnt, typename Func>
|
|
size_t smp_message_queue::process_queue(lf_queue& q, Func process) {
|
|
// copy batch to local memory in order to minimize
|
|
// time in which cross-cpu data is accessed
|
|
work_item* items[queue_length + PrefetchCnt];
|
|
work_item* wi;
|
|
if (!q.pop(wi))
|
|
return 0;
|
|
// start prefecthing first item before popping the rest to overlap memory
|
|
// access with potential cache miss the second pop may cause
|
|
prefetch<2>(wi);
|
|
auto nr = q.pop(items);
|
|
std::fill(std::begin(items) + nr, std::begin(items) + nr + PrefetchCnt, nr ? items[nr - 1] : wi);
|
|
unsigned i = 0;
|
|
do {
|
|
prefetch_n<2>(std::begin(items) + i, std::begin(items) + i + PrefetchCnt);
|
|
process(wi);
|
|
wi = items[i++];
|
|
} while(i <= nr);
|
|
|
|
return nr + 1;
|
|
}
|
|
|
|
size_t smp_message_queue::process_completions() {
|
|
auto nr = process_queue<prefetch_cnt*2>(_completed, [] (work_item* wi) {
|
|
wi->complete();
|
|
delete wi;
|
|
});
|
|
_current_queue_length -= nr;
|
|
_compl += nr;
|
|
_last_cmpl_batch = nr;
|
|
|
|
return nr;
|
|
}
|
|
|
|
void smp_message_queue::flush_request_batch() {
|
|
move_pending();
|
|
}
|
|
|
|
size_t smp_message_queue::process_incoming() {
|
|
auto nr = process_queue<prefetch_cnt>(_pending, [this] (work_item* wi) {
|
|
wi->process().then([this, wi] {
|
|
respond(wi);
|
|
});
|
|
});
|
|
_received += nr;
|
|
_last_rcv_batch = nr;
|
|
return nr;
|
|
}
|
|
|
|
void smp_message_queue::start(unsigned cpuid) {
|
|
_tx.init();
|
|
char instance[10];
|
|
std::snprintf(instance, sizeof(instance), "%u-%u", engine().cpu_id(), cpuid);
|
|
_collectd_regs = scollectd::registrations({
|
|
// queue_length value:GAUGE:0:U
|
|
// Absolute value of num packets in last tx batch.
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("smp"
|
|
, instance
|
|
, "queue_length", "send-batch")
|
|
, scollectd::make_typed(scollectd::data_type::GAUGE, _last_snt_batch)
|
|
),
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("smp"
|
|
, instance
|
|
, "queue_length", "receive-batch")
|
|
, scollectd::make_typed(scollectd::data_type::GAUGE, _last_rcv_batch)
|
|
),
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("smp"
|
|
, instance
|
|
, "queue_length", "complete-batch")
|
|
, scollectd::make_typed(scollectd::data_type::GAUGE, _last_cmpl_batch)
|
|
),
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("smp"
|
|
, instance
|
|
, "queue_length", "send-queue-length")
|
|
, scollectd::make_typed(scollectd::data_type::GAUGE, _current_queue_length)
|
|
),
|
|
// total_operations value:DERIVE:0:U
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("smp"
|
|
, instance
|
|
, "total_operations", "received-messages")
|
|
, scollectd::make_typed(scollectd::data_type::DERIVE, _received)
|
|
),
|
|
// total_operations value:DERIVE:0:U
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("smp"
|
|
, instance
|
|
, "total_operations", "sent-messages")
|
|
, scollectd::make_typed(scollectd::data_type::DERIVE, _sent)
|
|
),
|
|
// total_operations value:DERIVE:0:U
|
|
scollectd::add_polled_metric(scollectd::type_instance_id("smp"
|
|
, instance
|
|
, "total_operations", "completed-messages")
|
|
, scollectd::make_typed(scollectd::data_type::DERIVE, _compl)
|
|
),
|
|
});
|
|
}
|
|
|
|
/* not yet implemented for OSv. TODO: do the notification like we do class smp. */
|
|
#ifndef HAVE_OSV
|
|
thread_pool::thread_pool() : _worker_thread([this] { work(); }), _notify(pthread_self()) {
|
|
engine()._signals.handle_signal(SIGUSR1, [this] { inter_thread_wq.complete(); });
|
|
}
|
|
|
|
void thread_pool::work() {
|
|
sigset_t mask;
|
|
sigfillset(&mask);
|
|
auto r = ::sigprocmask(SIG_BLOCK, &mask, NULL);
|
|
throw_system_error_on(r == -1);
|
|
while (true) {
|
|
uint64_t count;
|
|
auto r = ::read(inter_thread_wq._start_eventfd.get_read_fd(), &count, sizeof(count));
|
|
assert(r == sizeof(count));
|
|
if (_stopped.load(std::memory_order_relaxed)) {
|
|
break;
|
|
}
|
|
inter_thread_wq._pending.consume_all([this] (syscall_work_queue::work_item* wi) {
|
|
wi->process();
|
|
inter_thread_wq._completed.push(wi);
|
|
});
|
|
pthread_kill(_notify, SIGUSR1);
|
|
}
|
|
}
|
|
|
|
thread_pool::~thread_pool() {
|
|
_stopped.store(true, std::memory_order_relaxed);
|
|
inter_thread_wq._start_eventfd.signal(1);
|
|
_worker_thread.join();
|
|
}
|
|
#endif
|
|
|
|
readable_eventfd writeable_eventfd::read_side() {
|
|
return readable_eventfd(_fd.dup());
|
|
}
|
|
|
|
file_desc writeable_eventfd::try_create_eventfd(size_t initial) {
|
|
assert(size_t(int(initial)) == initial);
|
|
return file_desc::eventfd(initial, EFD_CLOEXEC);
|
|
}
|
|
|
|
void writeable_eventfd::signal(size_t count) {
|
|
uint64_t c = count;
|
|
auto r = _fd.write(&c, sizeof(c));
|
|
assert(r == sizeof(c));
|
|
}
|
|
|
|
writeable_eventfd readable_eventfd::write_side() {
|
|
return writeable_eventfd(_fd.get_file_desc().dup());
|
|
}
|
|
|
|
file_desc readable_eventfd::try_create_eventfd(size_t initial) {
|
|
assert(size_t(int(initial)) == initial);
|
|
return file_desc::eventfd(initial, EFD_CLOEXEC | EFD_NONBLOCK);
|
|
}
|
|
|
|
future<size_t> readable_eventfd::wait() {
|
|
return engine().readable(*_fd._s).then([this] {
|
|
uint64_t count;
|
|
int r = ::read(_fd.get_fd(), &count, sizeof(count));
|
|
assert(r == sizeof(count));
|
|
return make_ready_future<size_t>(count);
|
|
});
|
|
}
|
|
|
|
void schedule(std::unique_ptr<task> t) {
|
|
engine().add_task(std::move(t));
|
|
}
|
|
|
|
bool operator==(const ::sockaddr_in a, const ::sockaddr_in b) {
|
|
return (a.sin_addr.s_addr == b.sin_addr.s_addr) && (a.sin_port == b.sin_port);
|
|
}
|
|
|
|
void network_stack_registry::register_stack(sstring name,
|
|
boost::program_options::options_description opts,
|
|
std::function<future<std::unique_ptr<network_stack>> (options opts)> create, bool make_default) {
|
|
_map()[name] = std::move(create);
|
|
options_description().add(opts);
|
|
if (make_default) {
|
|
_default() = name;
|
|
}
|
|
}
|
|
|
|
sstring network_stack_registry::default_stack() {
|
|
return _default();
|
|
}
|
|
|
|
std::vector<sstring> network_stack_registry::list() {
|
|
std::vector<sstring> ret;
|
|
for (auto&& ns : _map()) {
|
|
ret.push_back(ns.first);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
future<std::unique_ptr<network_stack>>
|
|
network_stack_registry::create(options opts) {
|
|
return create(_default(), opts);
|
|
}
|
|
|
|
future<std::unique_ptr<network_stack>>
|
|
network_stack_registry::create(sstring name, options opts) {
|
|
return _map()[name](opts);
|
|
}
|
|
|
|
boost::program_options::options_description
|
|
reactor::get_options_description() {
|
|
namespace bpo = boost::program_options;
|
|
bpo::options_description opts("Core options");
|
|
auto net_stack_names = network_stack_registry::list();
|
|
opts.add_options()
|
|
("network-stack", bpo::value<std::string>(),
|
|
sprint("select network stack (valid values: %s)",
|
|
format_separated(net_stack_names.begin(), net_stack_names.end(), ", ")).c_str())
|
|
("no-handle-interrupt", "ignore SIGINT (for gdb)")
|
|
("task-quota", bpo::value<int>()->default_value(200), "Max number of tasks executed between polls and in loops")
|
|
;
|
|
opts.add(network_stack_registry::options_description());
|
|
return opts;
|
|
}
|
|
|
|
// We need a wrapper class, because boost::program_options wants validate()
|
|
// (below) to be in the same namespace as the type it is validating.
|
|
struct cpuset_wrapper {
|
|
resource::cpuset value;
|
|
};
|
|
|
|
// Overload for boost program options parsing/validation
|
|
void validate(boost::any& v,
|
|
const std::vector<std::string>& values,
|
|
cpuset_wrapper* target_type, int) {
|
|
using namespace boost::program_options;
|
|
static std::regex r("(\\d+-)?(\\d+)(,(\\d+-)?(\\d+))*");
|
|
validators::check_first_occurrence(v);
|
|
// Extract the first string from 'values'. If there is more than
|
|
// one string, it's an error, and exception will be thrown.
|
|
auto&& s = validators::get_single_string(values);
|
|
std::smatch match;
|
|
if (std::regex_match(s, match, r)) {
|
|
std::vector<std::string> ranges;
|
|
boost::split(ranges, s, boost::is_any_of(","));
|
|
cpuset_wrapper ret;
|
|
for (auto&& range: ranges) {
|
|
std::string beg = range;
|
|
std::string end = range;
|
|
auto dash = range.find('-');
|
|
if (dash != range.npos) {
|
|
beg = range.substr(0, dash);
|
|
end = range.substr(dash + 1);
|
|
}
|
|
auto b = boost::lexical_cast<unsigned>(beg);
|
|
auto e = boost::lexical_cast<unsigned>(end);
|
|
if (b > e) {
|
|
throw validation_error(validation_error::invalid_option_value);
|
|
}
|
|
for (auto i = b; i <= e; ++i) {
|
|
std::cout << "adding " << i << "\n";
|
|
ret.value.insert(i);
|
|
}
|
|
}
|
|
v = std::move(ret);
|
|
} else {
|
|
throw validation_error(validation_error::invalid_option_value);
|
|
}
|
|
}
|
|
|
|
boost::program_options::options_description
|
|
smp::get_options_description()
|
|
{
|
|
namespace bpo = boost::program_options;
|
|
bpo::options_description opts("SMP options");
|
|
opts.add_options()
|
|
("smp,c", bpo::value<unsigned>(), "number of threads (default: one per CPU)")
|
|
("cpuset", bpo::value<cpuset_wrapper>(), "CPUs to use (in cpuset(7) format; default: all))")
|
|
("memory,m", bpo::value<std::string>(), "memory to use, in bytes (ex: 4G) (default: all)")
|
|
("reserve-memory", bpo::value<std::string>()->default_value("512M"), "memory reserved to OS")
|
|
("hugepages", bpo::value<std::string>(), "path to accessible hugetlbfs mount (typically /dev/hugepages/something)")
|
|
;
|
|
return opts;
|
|
}
|
|
|
|
std::vector<smp::thread_adaptor> smp::_threads;
|
|
smp_message_queue** smp::_qs;
|
|
std::thread::id smp::_tmain;
|
|
unsigned smp::count = 1;
|
|
|
|
void smp::start_all_queues()
|
|
{
|
|
for (unsigned c = 0; c < count; c++) {
|
|
if (c != engine().cpu_id()) {
|
|
_qs[c][engine().cpu_id()].start(c);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef HAVE_DPDK
|
|
|
|
int dpdk_thread_adaptor(void* f)
|
|
{
|
|
(*static_cast<std::function<void ()>*>(f))();
|
|
return 0;
|
|
}
|
|
|
|
void smp::join_all()
|
|
{
|
|
rte_eal_mp_wait_lcore();
|
|
}
|
|
|
|
void smp::pin(unsigned cpu_id) {
|
|
}
|
|
#else
|
|
void smp::join_all()
|
|
{
|
|
for (auto&& t: smp::_threads) {
|
|
t.join();
|
|
}
|
|
}
|
|
|
|
void smp::pin(unsigned cpu_id) {
|
|
pin_this_thread(cpu_id);
|
|
}
|
|
#endif
|
|
|
|
void smp::allocate_reactor() {
|
|
struct reactor_deleter {
|
|
void operator()(reactor* p) {
|
|
p->~reactor();
|
|
free(p);
|
|
}
|
|
};
|
|
static thread_local std::unique_ptr<reactor, reactor_deleter>
|
|
reactor_holder;
|
|
|
|
assert(!reactor_holder);
|
|
|
|
// we cannot just write "local_engin = new reactor" since reactor's constructor
|
|
// uses local_engine
|
|
void *buf;
|
|
int r = posix_memalign(&buf, 64, sizeof(reactor));
|
|
assert(r == 0);
|
|
local_engine = reinterpret_cast<reactor*>(buf);
|
|
new (buf) reactor;
|
|
reactor_holder.reset(local_engine);
|
|
}
|
|
|
|
void smp::cleanup() {
|
|
smp::_threads = std::vector<thread_adaptor>();
|
|
}
|
|
|
|
void smp::configure(boost::program_options::variables_map configuration)
|
|
{
|
|
smp::count = 1;
|
|
smp::_tmain = std::this_thread::get_id();
|
|
auto nr_cpus = resource::nr_processing_units();
|
|
resource::cpuset cpu_set;
|
|
std::copy(boost::counting_iterator<unsigned>(0), boost::counting_iterator<unsigned>(nr_cpus),
|
|
std::inserter(cpu_set, cpu_set.end()));
|
|
if (configuration.count("cpuset")) {
|
|
cpu_set = configuration["cpuset"].as<cpuset_wrapper>().value;
|
|
}
|
|
if (configuration.count("smp")) {
|
|
nr_cpus = configuration["smp"].as<unsigned>();
|
|
} else {
|
|
nr_cpus = cpu_set.size();
|
|
}
|
|
smp::count = nr_cpus;
|
|
resource::configuration rc;
|
|
if (configuration.count("memory")) {
|
|
rc.total_memory = parse_memory_size(configuration["memory"].as<std::string>());
|
|
#ifdef HAVE_DPDK
|
|
if (configuration.count("hugepages") &&
|
|
!configuration["network-stack"].as<std::string>().compare("native") &&
|
|
configuration.count("dpdk-pmd")) {
|
|
size_t dpdk_memory = dpdk::eal::mem_size(smp::count);
|
|
|
|
if (dpdk_memory >= rc.total_memory) {
|
|
std::cerr<<"Can't run with the given amount of memory: ";
|
|
std::cerr<<configuration["memory"].as<std::string>();
|
|
std::cerr<<". Consider giving more."<<std::endl;
|
|
exit(1);
|
|
}
|
|
|
|
//
|
|
// Subtract the memory we are about to give to DPDK from the total
|
|
// amount of memory we are allowed to use.
|
|
//
|
|
rc.total_memory.value() -= dpdk_memory;
|
|
}
|
|
#endif
|
|
}
|
|
if (configuration.count("reserve-memory")) {
|
|
rc.reserve_memory = parse_memory_size(configuration["reserve-memory"].as<std::string>());
|
|
}
|
|
std::experimental::optional<std::string> hugepages_path;
|
|
if (configuration.count("hugepages")) {
|
|
hugepages_path = configuration["hugepages"].as<std::string>();
|
|
}
|
|
rc.cpus = smp::count;
|
|
rc.cpu_set = std::move(cpu_set);
|
|
std::vector<resource::cpu> allocations = resource::allocate(rc);
|
|
smp::pin(allocations[0].cpu_id);
|
|
memory::configure(allocations[0].mem, hugepages_path);
|
|
smp::_qs = new smp_message_queue* [smp::count];
|
|
for(unsigned i = 0; i < smp::count; i++) {
|
|
smp::_qs[i] = new smp_message_queue[smp::count];
|
|
}
|
|
|
|
#ifdef HAVE_DPDK
|
|
dpdk::eal::cpuset cpus;
|
|
for (auto&& a : allocations) {
|
|
cpus[a.cpu_id] = true;
|
|
}
|
|
dpdk::eal::init(cpus, configuration);
|
|
#endif
|
|
|
|
// Better to put it into the smp class, but at smp construction time
|
|
// correct smp::count is not known.
|
|
static boost::barrier inited(smp::count);
|
|
|
|
unsigned i;
|
|
for (i = 1; i < smp::count; i++) {
|
|
auto allocation = allocations[i];
|
|
_threads.emplace_back([configuration, hugepages_path, i, allocation] {
|
|
smp::pin(allocation.cpu_id);
|
|
memory::configure(allocation.mem, hugepages_path);
|
|
sigset_t mask;
|
|
sigfillset(&mask);
|
|
auto r = ::sigprocmask(SIG_BLOCK, &mask, NULL);
|
|
throw_system_error_on(r == -1);
|
|
allocate_reactor();
|
|
engine()._id = i;
|
|
start_all_queues();
|
|
inited.wait();
|
|
engine().configure(configuration);
|
|
engine().run();
|
|
});
|
|
}
|
|
|
|
allocate_reactor();
|
|
|
|
#ifdef HAVE_DPDK
|
|
auto it = _threads.begin();
|
|
RTE_LCORE_FOREACH_SLAVE(i) {
|
|
rte_eal_remote_launch(dpdk_thread_adaptor, static_cast<void*>(&*(it++)), i);
|
|
}
|
|
#endif
|
|
|
|
start_all_queues();
|
|
inited.wait();
|
|
engine().configure(configuration);
|
|
engine()._lowres_clock = std::make_unique<lowres_clock>();
|
|
}
|
|
|
|
__thread size_t future_avail_count = 0;
|
|
__thread size_t task_quota = 0;
|
|
|
|
__thread reactor* local_engine;
|
|
|
|
class reactor_notifier_epoll : public reactor_notifier {
|
|
writeable_eventfd _write;
|
|
readable_eventfd _read;
|
|
public:
|
|
reactor_notifier_epoll()
|
|
: _write()
|
|
, _read(_write.read_side()) {
|
|
}
|
|
virtual future<> wait() override {
|
|
// convert _read.wait(), a future<size_t>, to a future<>:
|
|
return _read.wait().then([this] (size_t ignore) {
|
|
return make_ready_future<>();
|
|
});
|
|
}
|
|
virtual void signal() override {
|
|
_write.signal(1);
|
|
}
|
|
};
|
|
|
|
std::unique_ptr<reactor_notifier>
|
|
reactor_backend_epoll::make_reactor_notifier() {
|
|
return std::make_unique<reactor_notifier_epoll>();
|
|
}
|
|
|
|
#ifdef HAVE_OSV
|
|
class reactor_notifier_osv :
|
|
public reactor_notifier, private osv::newpoll::pollable {
|
|
promise<> _pr;
|
|
// TODO: pollable should probably remember its poller, so we shouldn't
|
|
// need to keep another copy of this pointer
|
|
osv::newpoll::poller *_poller = nullptr;
|
|
bool _needed = false;
|
|
public:
|
|
virtual future<> wait() override {
|
|
return engine().notified(this);
|
|
}
|
|
virtual void signal() override {
|
|
wake();
|
|
}
|
|
virtual void on_wake() override {
|
|
_pr.set_value();
|
|
_pr = promise<>();
|
|
// We try to avoid del()/add() ping-pongs: After an one occurance of
|
|
// the event, we don't del() but rather set needed=false. We guess
|
|
// the future's continuation (scheduler by _pr.set_value() above)
|
|
// will make the pollable needed again. Only if we reach this callback
|
|
// a second time, and needed is still false, do we finally del().
|
|
if (!_needed) {
|
|
_poller->del(this);
|
|
_poller = nullptr;
|
|
|
|
}
|
|
_needed = false;
|
|
}
|
|
|
|
void enable(osv::newpoll::poller &poller) {
|
|
_needed = true;
|
|
if (_poller == &poller) {
|
|
return;
|
|
}
|
|
assert(!_poller); // don't put same pollable on multiple pollers!
|
|
_poller = &poller;
|
|
_poller->add(this);
|
|
}
|
|
|
|
virtual ~reactor_notifier_osv() {
|
|
if (_poller) {
|
|
_poller->del(this);
|
|
}
|
|
}
|
|
|
|
friend class reactor_backend_osv;
|
|
};
|
|
|
|
std::unique_ptr<reactor_notifier>
|
|
reactor_backend_osv::make_reactor_notifier() {
|
|
return std::make_unique<reactor_notifier_osv>();
|
|
}
|
|
#endif
|
|
|
|
|
|
#ifdef HAVE_OSV
|
|
reactor_backend_osv::reactor_backend_osv() {
|
|
}
|
|
|
|
bool
|
|
reactor_backend_osv::wait_and_process() {
|
|
_poller.process();
|
|
// osv::poller::process runs pollable's callbacks, but does not currently
|
|
// have a timer expiration callback - instead if gives us an expired()
|
|
// function we need to check:
|
|
if (_poller.expired()) {
|
|
_timer_promise.set_value();
|
|
_timer_promise = promise<>();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
future<>
|
|
reactor_backend_osv::notified(reactor_notifier *notifier) {
|
|
// reactor_backend_osv::make_reactor_notifier() generates a
|
|
// reactor_notifier_osv, so we only can work on such notifiers.
|
|
reactor_notifier_osv *n = dynamic_cast<reactor_notifier_osv *>(notifier);
|
|
if (n->read()) {
|
|
return make_ready_future<>();
|
|
}
|
|
n->enable(_poller);
|
|
return n->_pr.get_future();
|
|
}
|
|
|
|
|
|
future<>
|
|
reactor_backend_osv::readable(pollable_fd_state& fd) {
|
|
std::cout << "reactor_backend_osv does not support file descriptors - readable() shouldn't have been called!\n";
|
|
abort();
|
|
}
|
|
|
|
future<>
|
|
reactor_backend_osv::writeable(pollable_fd_state& fd) {
|
|
std::cout << "reactor_backend_osv does not support file descriptors - writeable() shouldn't have been called!\n";
|
|
abort();
|
|
}
|
|
|
|
void
|
|
reactor_backend_osv::forget(pollable_fd_state& fd) {
|
|
std::cout << "reactor_backend_osv does not support file descriptors - forget() shouldn't have been called!\n";
|
|
abort();
|
|
}
|
|
|
|
void
|
|
reactor_backend_osv::enable_timer(clock_type::time_point when) {
|
|
_poller.set_timer(when);
|
|
}
|
|
|
|
#endif
|
|
|
|
void report_exception(sstring message, std::exception_ptr eptr) {
|
|
#ifndef __GNUC__
|
|
std::cerr << message << ".\n";
|
|
#else
|
|
try {
|
|
std::rethrow_exception(eptr);
|
|
} catch(...) {
|
|
auto tp = abi::__cxa_current_exception_type();
|
|
std::cerr << message;
|
|
if (tp) {
|
|
int status;
|
|
char *demangled = abi::__cxa_demangle(tp->name(), 0, 0, &status);
|
|
std::cerr << " of type '";
|
|
if (status == 0) {
|
|
std::cerr << demangled;
|
|
free(demangled);
|
|
} else {
|
|
std::cerr << tp->name();
|
|
}
|
|
std::cerr << "'";
|
|
} else {
|
|
std::cerr << " of unknown type";
|
|
}
|
|
// Print more information on some known exception types
|
|
try {
|
|
throw;
|
|
} catch(const std::system_error &e) {
|
|
std::cerr << ": Error " << e.code() << " (" << e.code().message() << ")\n";
|
|
} catch(const std::exception& e) {
|
|
std::cerr << ": " << e.what() << "\n";
|
|
} catch(...) {
|
|
std::cerr << ".\n";
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* engine_exit() exits the reactor. It should be given a pointer to the
|
|
* exception which prompted this exit - or a null pointer if the exit
|
|
* request was not caused by any exception.
|
|
*/
|
|
void engine_exit(std::exception_ptr eptr) {
|
|
if (!eptr) {
|
|
engine().exit(0);
|
|
return;
|
|
}
|
|
report_exception("Exiting on unhandled exception", eptr);
|
|
engine().exit(1);
|
|
}
|
|
|
|
void report_failed_future(std::exception_ptr eptr) {
|
|
report_exception("WARNING: exceptional future ignored", eptr);
|
|
}
|
|
|
|
|
|
future<file> open_file_dma(sstring name, open_flags flags) {
|
|
return engine().open_file_dma(std::move(name), flags);
|
|
}
|
|
|
|
future<file> open_directory(sstring name) {
|
|
return engine().open_directory(std::move(name));
|
|
}
|
|
|
|
future<> make_directory(sstring name) {
|
|
return engine().make_directory(std::move(name));
|
|
}
|
|
|
|
future<> touch_directory(sstring name) {
|
|
return make_directory(name).then_wrapped([] (future<> f) {
|
|
try {
|
|
f.get();
|
|
} catch (std::system_error& e) {
|
|
if (e.code() != std::error_code(EEXIST, std::system_category())) {
|
|
throw;
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/// \cond internal
|
|
future<> do_flush_directory(sstring name) {
|
|
if (name.empty()) {
|
|
return make_ready_future<>();
|
|
}
|
|
|
|
return open_directory(name).then([] (file f) {
|
|
return f.flush().then([f] () mutable {
|
|
return f.close();
|
|
});
|
|
});
|
|
}
|
|
|
|
future<> do_recursive_touch_directory(sstring base, sstring name) {
|
|
static const sstring::value_type separator = '/';
|
|
|
|
if (name.empty()) {
|
|
return make_ready_future<>();
|
|
}
|
|
|
|
size_t pos = std::min(name.find(separator), name.size() - 1);
|
|
base += name.substr(0 , pos + 1);
|
|
name = name.substr(pos + 1);
|
|
return touch_directory(base).then([base, name] {
|
|
return do_recursive_touch_directory(base, name);
|
|
}).then([base] {
|
|
// We will now flush the directory that holds the entry we potentially
|
|
// created. Technically speaking, we only need to touch when we did
|
|
// create. But flushing the unchanged ones should be cheap enough - and
|
|
// it simplifies the code considerably.
|
|
return do_flush_directory(base);
|
|
});
|
|
}
|
|
/// \endcond
|
|
|
|
future<> recursive_touch_directory(sstring name) {
|
|
// If the name is empty, it will be of the type a/b/c, which should be interpreted as
|
|
// a relative path. This means we have to flush our current directory
|
|
sstring base = "";
|
|
if (name[0] == '/' || name[0] == '.') {
|
|
base = "./";
|
|
}
|
|
return do_recursive_touch_directory(base, name);
|
|
}
|
|
|
|
future<> remove_file(sstring pathname) {
|
|
return engine().remove_file(std::move(pathname));
|
|
}
|
|
|
|
future<> rename_file(sstring old_pathname, sstring new_pathname) {
|
|
return engine().rename_file(std::move(old_pathname), std::move(new_pathname));
|
|
}
|
|
|
|
server_socket listen(socket_address sa) {
|
|
return engine().listen(sa);
|
|
}
|
|
|
|
server_socket listen(socket_address sa, listen_options opts) {
|
|
return engine().listen(sa, opts);
|
|
}
|
|
|
|
future<connected_socket> connect(socket_address sa) {
|
|
return engine().connect(sa);
|
|
}
|