/* * Copyright (C) 2014 Cloudius Systems, Ltd. */ #include #include #include "core/posix.hh" #include "core/vla.hh" #include "core/reactor.hh" #include "core/future-util.hh" #include "core/stream.hh" #include "core/circular_buffer.hh" #include "core/align.hh" #include #include #include #include #include #include "ip.hh" #include "net/native-stack.hh" #include #include #include #include "core/xen/xenstore.hh" #include "core/xen/evtchn.hh" #include "xenfront.hh" #include using namespace net; namespace xen { using phys = uint64_t; class xenfront_device : public device { public: xenstore* _xenstore = xenstore::instance(); private: net::hw_features _hw_features; ethernet_address _hw_address; std::string _device_str; public: bool _userspace; public: xenfront_device(boost::program_options::variables_map opts, bool userspace) : _hw_address(net::parse_ethernet_address(_xenstore->read(path("mac")))) , _device_str("device/vif/" + std::to_string(opts["vif"].as())) , _userspace(userspace) { _hw_features.rx_csum_offload = true; _hw_features.tx_csum_l4_offload = true; } std::string path(std::string s) { return _device_str + "/" + s; } ethernet_address hw_address() override { return _hw_address; } net::hw_features hw_features() override { return _hw_features; } virtual std::unique_ptr init_local_queue(boost::program_options::variables_map opts, uint16_t qid) override; }; class xenfront_qp : public net::qp { private: xenfront_device* _dev; unsigned _otherend; std::string _backend; gntalloc *_gntalloc; evtchn *_evtchn; port _tx_evtchn; port _rx_evtchn; front_ring _tx_ring; front_ring _rx_ring; grant_head *_tx_refs; grant_head *_rx_refs; std::unordered_map _features; static std::unordered_map _supported_features; port bind_tx_evtchn(bool split); port bind_rx_evtchn(bool split); future<> alloc_rx_references(); future<> handle_tx_completions(); future<> queue_rx_packet(); void alloc_one_rx_reference(unsigned id); std::string path(std::string s) { return _dev->path(s); } public: explicit xenfront_qp(xenfront_device* dev, boost::program_options::variables_map opts); ~xenfront_qp(); virtual void rx_start() override; virtual future<> send(packet p) override; }; std::unordered_map xenfront_qp::_supported_features = { { "feature-split-event-channels", "feature-split-event-channels" }, { "feature-rx-copy", "request-rx-copy" } }; void xenfront_qp::rx_start() { keep_doing([this] { return _rx_evtchn.pending().then([this] { return queue_rx_packet(); }); }); } future<> xenfront_qp::send(packet _p) { uint32_t frag = 0; // There doesn't seem to be a way to tell xen, when using the userspace // drivers, to map a particular page. Therefore, the only alternative // here is to copy. All pages shared must come from the gntalloc mmap. // // A better solution could be to change the packet allocation path to // use a pre-determined page for data. // // In-kernel should be fine // FIXME: negotiate and use scatter/gather _p.linearize(); return _tx_ring.entries.has_room().then([this, p = std::move(_p), frag] () mutable { auto req_prod = _tx_ring._sring->req_prod; auto f = p.frag(frag); auto ref = _tx_refs->new_ref(f.base, f.size); unsigned idx = _tx_ring.entries.get_index(); assert(!_tx_ring.entries[idx]); _tx_ring.entries[idx] = ref; auto req = &_tx_ring._sring->_ring[idx].req; req->gref = ref.xen_id; req->offset = 0; req->flags = {}; if (p.offload_info().protocol != ip_protocol_num::unused) { req->flags.csum_blank = true; req->flags.data_validated = true; } else { req->flags.data_validated = true; } req->id = idx; req->size = f.size; _tx_ring.req_prod_pvt = idx; _tx_ring._sring->req_prod = req_prod + 1; _tx_ring._sring->req_event++; if ((frag + 1) == p.nr_frags()) { _tx_evtchn.notify(); return make_ready_future<>(); } else { return make_ready_future<>(); } }); // FIXME: Don't forget to clear all grant refs when frontend closes. Or is it automatic? } #define rmb() asm volatile("lfence":::"memory"); #define wmb() asm volatile("":::"memory"); template future<> front_ring::entries::has_room() { return _available.wait(); } template void front_ring::entries::free_index(unsigned id) { _available.signal(); } template unsigned front_ring::entries::get_index() { return front_ring::idx(_next_idx++); } template future<> front_ring::process_ring(std::function func, grant_head *refs) { auto prod = _sring->rsp_prod; rmb(); for (unsigned i = rsp_cons; i != prod; i++) { auto el = _sring->_ring[idx(i)]; if (el.rsp.status < 0) { dump("Packet error", el.rsp); continue; } auto& entry = entries[i]; if (!func(entry, el)) { continue; } assert(entry.xen_id >= 0); refs->free_ref(entry); entries.free_index(i); prod = _sring->rsp_prod; } rsp_cons = prod; _sring->rsp_event = prod + 1; return make_ready_future<>(); } future<> xenfront_qp::queue_rx_packet() { uint64_t bunch; return _rx_ring.process_ring([this, &bunch] (gntref &entry, rx &rx) mutable { packet p(static_cast(entry.page) + rx.rsp.offset, rx.rsp.status); _dev->l2receive(std::move(p)); bunch++; return true; }, _rx_refs); update_rx_count(bunch); } void xenfront_qp::alloc_one_rx_reference(unsigned index) { _rx_ring.entries[index] = _rx_refs->new_ref(); // This is how the backend knows where to put data. auto req = &_rx_ring._sring->_ring[index].req; req->id = index; req->gref = _rx_ring.entries[index].xen_id; } future<> xenfront_qp::alloc_rx_references() { return _rx_ring.entries.has_room().then([this] () { unsigned i = _rx_ring.entries.get_index(); auto req_prod = _rx_ring.req_prod_pvt; alloc_one_rx_reference(i); ++req_prod; _rx_ring.req_prod_pvt = req_prod; wmb(); _rx_ring._sring->req_prod = req_prod; /* ready */ _rx_evtchn.notify(); }); } future<> xenfront_qp::handle_tx_completions() { return _tx_ring.process_ring([this] (gntref &entry, tx &tx) { if (tx.rsp.status == 1) { return false; } if (tx.rsp.status != 0) { _tx_ring.dump("TX positive packet error", tx.rsp); return false; } return true; }, _tx_refs); } port xenfront_qp::bind_tx_evtchn(bool split) { return _evtchn->bind(); } port xenfront_qp::bind_rx_evtchn(bool split) { if (split) { return _evtchn->bind(); } return _evtchn->bind(_tx_evtchn.number()); } xenfront_qp::xenfront_qp(xenfront_device* dev, boost::program_options::variables_map opts) : _dev(dev) , _otherend(_dev->_xenstore->read(path("backend-id"))) , _backend(_dev->_xenstore->read(path("backend"))) , _gntalloc(gntalloc::instance(_dev->_userspace, _otherend)) , _evtchn(evtchn::instance(_dev->_userspace, _otherend)) , _tx_ring(_gntalloc->alloc_ref()) , _rx_ring(_gntalloc->alloc_ref()) , _tx_refs(_gntalloc->alloc_ref(front_ring::nr_ents)) , _rx_refs(_gntalloc->alloc_ref(front_ring::nr_ents)) { auto all_features = _dev->_xenstore->ls(_backend); for (auto&& feat : all_features) { if (feat.compare(0, 8, "feature-") == 0) { auto val = _dev->_xenstore->read(_backend + "/" + feat); try { auto key = _supported_features.at(feat); _features[key] = val; } catch (const std::out_of_range& oor) { _features[feat] = 0; } } } if (!opts["split-event-channels"].as()) { _features["feature-split-event-channels"] = 0; } bool split = _features["feature-split-event-channels"]; _tx_evtchn = bind_tx_evtchn(split); _rx_evtchn = bind_rx_evtchn(split); { auto t = xenstore::xenstore_transaction(); for (auto&& f: _features) { _dev->_xenstore->write(path(f.first), f.second, t); } if (split) { _dev->_xenstore->write(path("event-channel-tx"), _tx_evtchn.number(), t); _dev->_xenstore->write(path("event-channel-rx"), _rx_evtchn.number(), t); } else { _dev->_xenstore->write(path("event-channel"), _rx_evtchn.number(), t); } _dev->_xenstore->write(path("tx-ring-ref"), _tx_ring.ref, t); _dev->_xenstore->write(path("rx-ring-ref"), _rx_ring.ref, t); _dev->_xenstore->write(path("state"), 4, t); } keep_doing([this] { return alloc_rx_references(); }); _rx_evtchn.umask(); keep_doing([this] () { return _tx_evtchn.pending().then([this] { handle_tx_completions(); }); }); _tx_evtchn.umask(); } xenfront_qp::~xenfront_qp() { { auto t = xenstore::xenstore_transaction(); for (auto& f: _features) { _dev->_xenstore->remove(path(f.first), t); } _dev->_xenstore->remove(path("event-channel-tx"), t); _dev->_xenstore->remove(path("event-channel-rx"), t); _dev->_xenstore->remove(path("event-channel"), t); _dev->_xenstore->remove(path("tx-ring-ref"), t); _dev->_xenstore->remove(path("rx-ring-ref"), t); _dev->_xenstore->write(path("state"), 6, t); } _dev->_xenstore->write(path("state"), 1); } boost::program_options::options_description get_xenfront_net_options_description() { boost::program_options::options_description opts( "xenfront net options"); opts.add_options() ("vif", boost::program_options::value()->default_value(0), "vif number to hijack") ("split-event-channels", boost::program_options::value()->default_value(true), "Split event channel support") ; return opts; } std::unique_ptr xenfront_device::init_local_queue(boost::program_options::variables_map opts, uint16_t qid) { assert(!qid); return std::make_unique(this, opts); } std::unique_ptr create_xenfront_net_device(boost::program_options::variables_map opts, bool userspace) { static bool called = false; assert(!called); called = true; return std::make_unique(opts, userspace); } }