410 lines
11 KiB
C++
410 lines
11 KiB
C++
/*
|
|
* Copyright (C) 2014 Cloudius Systems, Ltd.
|
|
*/
|
|
|
|
#include <boost/lexical_cast.hpp>
|
|
#include <boost/algorithm/string.hpp>
|
|
#include "core/posix.hh"
|
|
#include "core/vla.hh"
|
|
#include "core/reactor.hh"
|
|
#include "core/future-util.hh"
|
|
#include "core/stream.hh"
|
|
#include "core/circular_buffer.hh"
|
|
#include "core/align.hh"
|
|
#include <atomic>
|
|
#include <list>
|
|
#include <queue>
|
|
#include <fcntl.h>
|
|
#include <linux/if_tun.h>
|
|
#include "ip.hh"
|
|
#include "net/native-stack.hh"
|
|
|
|
#include <xen/xen.h>
|
|
|
|
#include <xen/memory.h>
|
|
#include <xen/sys/gntalloc.h>
|
|
|
|
#include "core/xen/xenstore.hh"
|
|
#include "core/xen/evtchn.hh"
|
|
|
|
#include "xenfront.hh"
|
|
#include <unordered_set>
|
|
|
|
using namespace net;
|
|
|
|
namespace xen {
|
|
|
|
using phys = uint64_t;
|
|
|
|
class xenfront_device : public device {
|
|
public:
|
|
xenstore* _xenstore = xenstore::instance();
|
|
private:
|
|
net::hw_features _hw_features;
|
|
ethernet_address _hw_address;
|
|
std::string _device_str;
|
|
|
|
public:
|
|
bool _userspace;
|
|
|
|
public:
|
|
xenfront_device(boost::program_options::variables_map opts, bool userspace)
|
|
: _hw_address(net::parse_ethernet_address(_xenstore->read(path("mac"))))
|
|
, _device_str("device/vif/" + std::to_string(opts["vif"].as<unsigned>()))
|
|
, _userspace(userspace) {
|
|
_hw_features.rx_csum_offload = true;
|
|
_hw_features.tx_csum_l4_offload = true;
|
|
}
|
|
|
|
std::string path(std::string s) { return _device_str + "/" + s; }
|
|
|
|
ethernet_address hw_address() override {
|
|
return _hw_address;
|
|
}
|
|
net::hw_features hw_features() override {
|
|
return _hw_features;
|
|
}
|
|
virtual std::unique_ptr<qp> init_local_queue(boost::program_options::variables_map opts, uint16_t qid) override;
|
|
};
|
|
|
|
class xenfront_qp : public net::qp {
|
|
private:
|
|
xenfront_device* _dev;
|
|
|
|
unsigned _otherend;
|
|
std::string _backend;
|
|
gntalloc *_gntalloc;
|
|
evtchn *_evtchn;
|
|
port _tx_evtchn;
|
|
port _rx_evtchn;
|
|
|
|
front_ring<tx> _tx_ring;
|
|
front_ring<rx> _rx_ring;
|
|
|
|
grant_head *_tx_refs;
|
|
grant_head *_rx_refs;
|
|
|
|
std::unordered_map<std::string, int> _features;
|
|
static std::unordered_map<std::string, std::string> _supported_features;
|
|
|
|
port bind_tx_evtchn(bool split);
|
|
port bind_rx_evtchn(bool split);
|
|
|
|
future<> alloc_rx_references();
|
|
future<> handle_tx_completions();
|
|
future<> queue_rx_packet();
|
|
|
|
void alloc_one_rx_reference(unsigned id);
|
|
std::string path(std::string s) { return _dev->path(s); }
|
|
|
|
public:
|
|
explicit xenfront_qp(xenfront_device* dev, boost::program_options::variables_map opts);
|
|
~xenfront_qp();
|
|
virtual void rx_start() override;
|
|
virtual future<> send(packet p) override;
|
|
};
|
|
|
|
std::unordered_map<std::string, std::string>
|
|
xenfront_qp::_supported_features = {
|
|
{ "feature-split-event-channels", "feature-split-event-channels" },
|
|
{ "feature-rx-copy", "request-rx-copy" }
|
|
};
|
|
|
|
void
|
|
xenfront_qp::rx_start() {
|
|
keep_doing([this] {
|
|
return _rx_evtchn.pending().then([this] {
|
|
return queue_rx_packet();
|
|
});
|
|
});
|
|
}
|
|
|
|
future<>
|
|
xenfront_qp::send(packet _p) {
|
|
|
|
uint32_t frag = 0;
|
|
// There doesn't seem to be a way to tell xen, when using the userspace
|
|
// drivers, to map a particular page. Therefore, the only alternative
|
|
// here is to copy. All pages shared must come from the gntalloc mmap.
|
|
//
|
|
// A better solution could be to change the packet allocation path to
|
|
// use a pre-determined page for data.
|
|
//
|
|
// In-kernel should be fine
|
|
|
|
// FIXME: negotiate and use scatter/gather
|
|
_p.linearize();
|
|
|
|
return _tx_ring.entries.has_room().then([this, p = std::move(_p), frag] () mutable {
|
|
|
|
auto req_prod = _tx_ring._sring->req_prod;
|
|
|
|
auto f = p.frag(frag);
|
|
|
|
auto ref = _tx_refs->new_ref(f.base, f.size);
|
|
|
|
unsigned idx = _tx_ring.entries.get_index();
|
|
assert(!_tx_ring.entries[idx]);
|
|
|
|
_tx_ring.entries[idx] = ref;
|
|
|
|
auto req = &_tx_ring._sring->_ring[idx].req;
|
|
req->gref = ref.xen_id;
|
|
req->offset = 0;
|
|
req->flags = {};
|
|
if (p.offload_info().protocol != ip_protocol_num::unused) {
|
|
req->flags.csum_blank = true;
|
|
req->flags.data_validated = true;
|
|
} else {
|
|
req->flags.data_validated = true;
|
|
}
|
|
req->id = idx;
|
|
req->size = f.size;
|
|
|
|
_tx_ring.req_prod_pvt = idx;
|
|
_tx_ring._sring->req_prod = req_prod + 1;
|
|
|
|
_tx_ring._sring->req_event++;
|
|
if ((frag + 1) == p.nr_frags()) {
|
|
_tx_evtchn.notify();
|
|
return make_ready_future<>();
|
|
} else {
|
|
return make_ready_future<>();
|
|
}
|
|
});
|
|
|
|
// FIXME: Don't forget to clear all grant refs when frontend closes. Or is it automatic?
|
|
}
|
|
|
|
#define rmb() asm volatile("lfence":::"memory");
|
|
#define wmb() asm volatile("":::"memory");
|
|
|
|
template <typename T>
|
|
future<> front_ring<T>::entries::has_room() {
|
|
return _available.wait();
|
|
}
|
|
|
|
template <typename T>
|
|
void front_ring<T>::entries::free_index(unsigned id) {
|
|
_available.signal();
|
|
}
|
|
|
|
template <typename T>
|
|
unsigned front_ring<T>::entries::get_index() {
|
|
return front_ring<T>::idx(_next_idx++);
|
|
}
|
|
|
|
template <typename T>
|
|
future<> front_ring<T>::process_ring(std::function<bool (gntref &entry, T& el)> func, grant_head *refs)
|
|
{
|
|
auto prod = _sring->rsp_prod;
|
|
rmb();
|
|
|
|
for (unsigned i = rsp_cons; i != prod; i++) {
|
|
auto el = _sring->_ring[idx(i)];
|
|
|
|
if (el.rsp.status < 0) {
|
|
dump("Packet error", el.rsp);
|
|
continue;
|
|
}
|
|
|
|
auto& entry = entries[i];
|
|
|
|
if (!func(entry, el)) {
|
|
continue;
|
|
}
|
|
|
|
assert(entry.xen_id >= 0);
|
|
|
|
refs->free_ref(entry);
|
|
entries.free_index(i);
|
|
|
|
prod = _sring->rsp_prod;
|
|
}
|
|
rsp_cons = prod;
|
|
_sring->rsp_event = prod + 1;
|
|
|
|
return make_ready_future<>();
|
|
}
|
|
|
|
future<> xenfront_qp::queue_rx_packet()
|
|
{
|
|
uint64_t bunch;
|
|
return _rx_ring.process_ring([this, &bunch] (gntref &entry, rx &rx) mutable {
|
|
packet p(static_cast<char *>(entry.page) + rx.rsp.offset, rx.rsp.status);
|
|
_dev->l2receive(std::move(p));
|
|
bunch++;
|
|
return true;
|
|
}, _rx_refs);
|
|
update_rx_count(bunch);
|
|
}
|
|
|
|
void xenfront_qp::alloc_one_rx_reference(unsigned index) {
|
|
|
|
_rx_ring.entries[index] = _rx_refs->new_ref();
|
|
|
|
// This is how the backend knows where to put data.
|
|
auto req = &_rx_ring._sring->_ring[index].req;
|
|
req->id = index;
|
|
req->gref = _rx_ring.entries[index].xen_id;
|
|
}
|
|
|
|
future<> xenfront_qp::alloc_rx_references() {
|
|
return _rx_ring.entries.has_room().then([this] () {
|
|
unsigned i = _rx_ring.entries.get_index();
|
|
|
|
auto req_prod = _rx_ring.req_prod_pvt;
|
|
alloc_one_rx_reference(i);
|
|
++req_prod;
|
|
_rx_ring.req_prod_pvt = req_prod;
|
|
wmb();
|
|
_rx_ring._sring->req_prod = req_prod;
|
|
/* ready */
|
|
_rx_evtchn.notify();
|
|
});
|
|
}
|
|
|
|
future<> xenfront_qp::handle_tx_completions() {
|
|
|
|
return _tx_ring.process_ring([this] (gntref &entry, tx &tx) {
|
|
if (tx.rsp.status == 1) {
|
|
return false;
|
|
}
|
|
|
|
if (tx.rsp.status != 0) {
|
|
_tx_ring.dump("TX positive packet error", tx.rsp);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}, _tx_refs);
|
|
}
|
|
|
|
port xenfront_qp::bind_tx_evtchn(bool split) {
|
|
return _evtchn->bind();
|
|
}
|
|
|
|
port xenfront_qp::bind_rx_evtchn(bool split) {
|
|
|
|
if (split) {
|
|
return _evtchn->bind();
|
|
}
|
|
return _evtchn->bind(_tx_evtchn.number());
|
|
}
|
|
|
|
xenfront_qp::xenfront_qp(xenfront_device* dev, boost::program_options::variables_map opts)
|
|
: _dev(dev)
|
|
, _otherend(_dev->_xenstore->read<int>(path("backend-id")))
|
|
, _backend(_dev->_xenstore->read(path("backend")))
|
|
, _gntalloc(gntalloc::instance(_dev->_userspace, _otherend))
|
|
, _evtchn(evtchn::instance(_dev->_userspace, _otherend))
|
|
, _tx_ring(_gntalloc->alloc_ref())
|
|
, _rx_ring(_gntalloc->alloc_ref())
|
|
, _tx_refs(_gntalloc->alloc_ref(front_ring<tx>::nr_ents))
|
|
, _rx_refs(_gntalloc->alloc_ref(front_ring<rx>::nr_ents)) {
|
|
|
|
auto all_features = _dev->_xenstore->ls(_backend);
|
|
|
|
for (auto&& feat : all_features) {
|
|
if (feat.compare(0, 8, "feature-") == 0) {
|
|
auto val = _dev->_xenstore->read<int>(_backend + "/" + feat);
|
|
try {
|
|
auto key = _supported_features.at(feat);
|
|
_features[key] = val;
|
|
} catch (const std::out_of_range& oor) {
|
|
_features[feat] = 0;
|
|
}
|
|
}
|
|
}
|
|
if (!opts["split-event-channels"].as<bool>()) {
|
|
_features["feature-split-event-channels"] = 0;
|
|
}
|
|
|
|
bool split = _features["feature-split-event-channels"];
|
|
|
|
_tx_evtchn = bind_tx_evtchn(split);
|
|
_rx_evtchn = bind_rx_evtchn(split);
|
|
|
|
{
|
|
auto t = xenstore::xenstore_transaction();
|
|
|
|
for (auto&& f: _features) {
|
|
_dev->_xenstore->write(path(f.first), f.second, t);
|
|
}
|
|
|
|
if (split) {
|
|
_dev->_xenstore->write<int>(path("event-channel-tx"), _tx_evtchn.number(), t);
|
|
_dev->_xenstore->write<int>(path("event-channel-rx"), _rx_evtchn.number(), t);
|
|
} else {
|
|
_dev->_xenstore->write<int>(path("event-channel"), _rx_evtchn.number(), t);
|
|
}
|
|
_dev->_xenstore->write<int>(path("tx-ring-ref"), _tx_ring.ref, t);
|
|
_dev->_xenstore->write<int>(path("rx-ring-ref"), _rx_ring.ref, t);
|
|
_dev->_xenstore->write<int>(path("state"), 4, t);
|
|
}
|
|
|
|
keep_doing([this] {
|
|
return alloc_rx_references();
|
|
});
|
|
|
|
_rx_evtchn.umask();
|
|
|
|
keep_doing([this] () {
|
|
return _tx_evtchn.pending().then([this] {
|
|
handle_tx_completions();
|
|
});
|
|
});
|
|
|
|
_tx_evtchn.umask();
|
|
}
|
|
|
|
xenfront_qp::~xenfront_qp() {
|
|
|
|
{
|
|
auto t = xenstore::xenstore_transaction();
|
|
|
|
for (auto& f: _features) {
|
|
_dev->_xenstore->remove(path(f.first), t);
|
|
}
|
|
_dev->_xenstore->remove(path("event-channel-tx"), t);
|
|
_dev->_xenstore->remove(path("event-channel-rx"), t);
|
|
_dev->_xenstore->remove(path("event-channel"), t);
|
|
_dev->_xenstore->remove(path("tx-ring-ref"), t);
|
|
_dev->_xenstore->remove(path("rx-ring-ref"), t);
|
|
_dev->_xenstore->write<int>(path("state"), 6, t);
|
|
}
|
|
|
|
_dev->_xenstore->write<int>(path("state"), 1);
|
|
}
|
|
|
|
boost::program_options::options_description
|
|
get_xenfront_net_options_description() {
|
|
|
|
boost::program_options::options_description opts(
|
|
"xenfront net options");
|
|
opts.add_options()
|
|
("vif",
|
|
boost::program_options::value<unsigned>()->default_value(0),
|
|
"vif number to hijack")
|
|
("split-event-channels",
|
|
boost::program_options::value<bool>()->default_value(true),
|
|
"Split event channel support")
|
|
;
|
|
return opts;
|
|
}
|
|
|
|
std::unique_ptr<qp> xenfront_device::init_local_queue(boost::program_options::variables_map opts, uint16_t qid) {
|
|
assert(!qid);
|
|
return std::make_unique<xenfront_qp>(this, opts);
|
|
}
|
|
|
|
std::unique_ptr<net::device> create_xenfront_net_device(boost::program_options::variables_map opts, bool userspace) {
|
|
static bool called = false;
|
|
assert(!called);
|
|
called = true;
|
|
|
|
return std::make_unique<xenfront_device>(opts, userspace);
|
|
}
|
|
|
|
}
|