From d14da53171bb6c3d2966c1241c3cd24e08f0427a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Dec 2014 10:41:08 +0200 Subject: [PATCH 1/8] virtio: move into 'namespace virtio' --- net/virtio.cc | 102 +++++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/net/virtio.cc b/net/virtio.cc index 859f43152d..9cd35d33c7 100644 --- a/net/virtio.cc +++ b/net/virtio.cc @@ -30,7 +30,9 @@ using namespace net; -class virtio_device : public device { +namespace virtio { + +class device : public net::device { private: boost::program_options::variables_map _opts; net::hw_features _hw_features; @@ -71,7 +73,7 @@ private: } public: - virtio_device(boost::program_options::variables_map opts) + device(boost::program_options::variables_map opts) : _opts(opts), _features(setup_features()) {} ethernet_address hw_address() override { @@ -94,7 +96,7 @@ public: * (where both notifications occur through eventfds) and one for an assigned * virtio device from OSv. */ -class virtio_notifier { +class notifier { public: // Notify the host virtual void notify() = 0; @@ -107,11 +109,11 @@ public: virtual void wake_wait() { abort(); } - virtual ~virtio_notifier() { + virtual ~notifier() { } }; -class virtio_notifier_vhost : public virtio_notifier { +class notifier_vhost : public notifier { private: readable_eventfd _notified; writeable_eventfd _kick; @@ -125,14 +127,14 @@ public: return make_ready_future<>(); }); } - virtio_notifier_vhost(readable_eventfd &¬ified, + notifier_vhost(readable_eventfd &¬ified, writeable_eventfd &&kick) : _notified(std::move(notified)) , _kick(std::move(kick)) {} }; #ifdef HAVE_OSV -class virtio_notifier_osv : public virtio_notifier { +class notifier_osv : public notifier { private: std::unique_ptr _notified; uint16_t _q_index; @@ -147,7 +149,7 @@ public: virtual void wake_wait() override { _notified->signal(); } - virtio_notifier_osv(osv::assigned_virtio &virtio, uint16_t q_index) + notifier_osv(osv::assigned_virtio &virtio, uint16_t q_index) : _notified(engine.make_reactor_notifier()) , _q_index(q_index) , _virtio(virtio) @@ -260,7 +262,7 @@ private: }; private: config _config; - std::unique_ptr _notifier; + std::unique_ptr _notifier; std::unique_ptr[]> _completions; desc* _descs; avail _avail; @@ -276,7 +278,7 @@ private: public: explicit vring(config conf, bool poll_mode); - void set_notifier(std::unique_ptr notifier) { + void set_notifier(std::unique_ptr notifier) { _notifier = std::move(notifier); } const config& getconfig() { @@ -509,7 +511,7 @@ void vring::complete() { }); } -class virtio_qp : public net::qp { +class qp : public net::qp { protected: struct net_hdr { uint8_t needs_csum : 1; @@ -525,11 +527,11 @@ protected: uint16_t num_buffers; }; class txq { - virtio_qp& _dev; + qp& _dev; vring _ring; public: - txq(virtio_qp& dev, vring::config config, bool poll_mode); - void set_notifier(std::unique_ptr notifier) { + txq(qp& dev, vring::config config, bool poll_mode); + void set_notifier(std::unique_ptr notifier) { _ring.set_notifier(std::move(notifier)); } const vring::config& getconfig() { @@ -542,14 +544,14 @@ protected: future<> post(packet p); }; class rxq { - virtio_qp& _dev; + qp& _dev; vring _ring; unsigned _remaining_buffers = 0; std::vector _fragments; std::vector> _deleters; public: - rxq(virtio_qp& _if, vring::config config, bool poll_mode); - void set_notifier(std::unique_ptr notifier) { + rxq(qp& _if, vring::config config, bool poll_mode); + void set_notifier(std::unique_ptr notifier) { _ring.set_notifier(std::move(notifier)); } const vring::config& getconfig() { @@ -566,7 +568,7 @@ protected: future<> prepare_buffers(); }; protected: - virtio_device* _dev; + device* _dev; size_t _header_len; std::unique_ptr _txq_storage; std::unique_ptr _rxq_storage; @@ -578,7 +580,7 @@ protected: void common_config(vring::config& r); size_t vring_storage_size(size_t ring_size); public: - explicit virtio_qp(virtio_device* dev, size_t rx_ring_size, size_t tx_ring_size, bool poll_mode); + explicit qp(device* dev, size_t rx_ring_size, size_t tx_ring_size, bool poll_mode); virtual future<> send(packet p) override; virtual void rx_start() override; virtual phys virt_to_phys(void* p) { @@ -586,12 +588,12 @@ public: } }; -virtio_qp::txq::txq(virtio_qp& dev, vring::config config, bool poll_mode) +qp::txq::txq(qp& dev, vring::config config, bool poll_mode) : _dev(dev), _ring(config, poll_mode) { } future<> -virtio_qp::txq::post(packet p) { +qp::txq::post(packet p) { net_hdr_mrg vhdr = {}; // Handle TCP checksum offload @@ -664,12 +666,12 @@ virtio_qp::txq::post(packet p) { }); } -virtio_qp::rxq::rxq(virtio_qp& dev, vring::config config, bool poll_mode) +qp::rxq::rxq(qp& dev, vring::config config, bool poll_mode) : _dev(dev), _ring(config, poll_mode) { } future<> -virtio_qp::rxq::prepare_buffers() { +qp::rxq::prepare_buffers() { auto& available = _ring.available_descriptors(); return available.wait(1).then([this, &available] { unsigned count = 1; @@ -738,7 +740,7 @@ static std::unique_ptr virtio_buffer(size_t size) { return std::unique_ptr(reinterpret_cast(ret)); } -virtio_qp::virtio_qp(virtio_device* dev, size_t rx_ring_size, size_t tx_ring_size, bool poll_mode) +qp::qp(device* dev, size_t rx_ring_size, size_t tx_ring_size, bool poll_mode) : _dev(dev) , _txq_storage(virtio_buffer(vring_storage_size(tx_ring_size))) , _rxq_storage(virtio_buffer(vring_storage_size(rx_ring_size))) @@ -746,19 +748,19 @@ virtio_qp::virtio_qp(virtio_device* dev, size_t rx_ring_size, size_t tx_ring_siz , _rxq(*this, rxq_config(rx_ring_size), poll_mode) { } -size_t virtio_qp::vring_storage_size(size_t ring_size) { +size_t qp::vring_storage_size(size_t ring_size) { // overestimate, but not by much. return 3 * 4096 + ring_size * (16 + 2 + 8); } -void virtio_qp::common_config(vring::config& r) { +void qp::common_config(vring::config& r) { r.avail = r.descs + 16 * r.size; r.used = align_up(r.avail + 2 * r.size + 6, 4096); r.event_index = (_dev->features() & VIRTIO_RING_F_EVENT_IDX) != 0; r.indirect = false; } -vring::config virtio_qp::txq_config(size_t tx_ring_size) { +vring::config qp::txq_config(size_t tx_ring_size) { vring::config r; r.size = tx_ring_size; r.descs = _txq_storage.get(); @@ -767,7 +769,7 @@ vring::config virtio_qp::txq_config(size_t tx_ring_size) { return r; } -vring::config virtio_qp::rxq_config(size_t rx_ring_size) { +vring::config qp::rxq_config(size_t rx_ring_size) { vring::config r; r.size = rx_ring_size; r.descs = _rxq_storage.get(); @@ -777,15 +779,17 @@ vring::config virtio_qp::rxq_config(size_t rx_ring_size) { } void -virtio_qp::rx_start() { +qp::rx_start() { _rxq.run(); } future<> -virtio_qp::send(packet p) { +qp::send(packet p) { return _txq.post(std::move(p)); } +} + boost::program_options::options_description get_virtio_net_options_description() { @@ -814,13 +818,15 @@ get_virtio_net_options_description() return opts; } -class virtio_qp_vhost : public virtio_qp { +namespace virtio { + +class qp_vhost : public qp { private: // The vhost file descriptor needs to remain open throughout the life of // this driver, as as soon as we close it, vhost stops servicing us. file_desc _vhost_fd; public: - virtio_qp_vhost(virtio_device* dev, boost::program_options::variables_map opts); + qp_vhost(device* dev, boost::program_options::variables_map opts); }; static size_t config_ring_size(boost::program_options::variables_map &opts) { @@ -831,8 +837,8 @@ static size_t config_ring_size(boost::program_options::variables_map &opts) { } } -virtio_qp_vhost::virtio_qp_vhost(virtio_device *dev, boost::program_options::variables_map opts) - : virtio_qp(dev, config_ring_size(opts), config_ring_size(opts), opts["virtio-poll-mode"].as()) +qp_vhost::qp_vhost(device *dev, boost::program_options::variables_map opts) + : qp(dev, config_ring_size(opts), config_ring_size(opts), opts["virtio-poll-mode"].as()) , _vhost_fd(file_desc::open("/dev/vhost-net", O_RDWR)) { auto tap_device = opts["tap-device"].as(); @@ -904,9 +910,9 @@ virtio_qp_vhost::virtio_qp_vhost(virtio_device *dev, boost::program_options::var _vhost_fd.ioctl(VHOST_SET_VRING_CALL, vhost_vring_file{0, _rxq_notify.get_write_fd()}); _vhost_fd.ioctl(VHOST_SET_VRING_KICK, vhost_vring_file{1, _txq_kick.get_read_fd()}); _vhost_fd.ioctl(VHOST_SET_VRING_CALL, vhost_vring_file{1, _txq_notify.get_write_fd()}); - _rxq.set_notifier(std::make_unique( + _rxq.set_notifier(std::make_unique( std::move(_rxq_notify), std::move(_rxq_kick))); - _txq.set_notifier(std::make_unique( + _txq.set_notifier(std::make_unique( std::move(_txq_notify), std::move(_txq_kick))); _vhost_fd.ioctl(VHOST_NET_SET_BACKEND, vhost_vring_file{0, tap_fd.get()}); @@ -916,12 +922,12 @@ virtio_qp_vhost::virtio_qp_vhost(virtio_device *dev, boost::program_options::var } #ifdef HAVE_OSV -class virtio_qp_osv : public virtio_qp { +class qp_osv : public qp { private: ethernet_address _mac; osv::assigned_virtio &_virtio; public: - virtio_qp_osv(osv::assigned_virtio &virtio, + qp_osv(osv::assigned_virtio &virtio, boost::program_options::variables_map opts); virtual ethernet_address hw_address() override { return _mac; @@ -931,9 +937,9 @@ public: } }; -virtio_qp_osv::virtio_qp_osv(osv::assigned_virtio &virtio, +qp_osv::qp_osv(osv::assigned_virtio &virtio, boost::program_options::variables_map opts) - : virtio_qp(opts, virtio.queue_size(0), virtio.queue_size(1), opts["virtio-poll-mode"].as()) + : qp(opts, virtio.queue_size(0), virtio.queue_size(1), opts["virtio-poll-mode"].as()) , _virtio(virtio) { // Read the host's virtio supported feature bitmask, AND it with the @@ -974,8 +980,8 @@ virtio_qp_osv::virtio_qp_osv(osv::assigned_virtio &virtio, host_config.mac[3], host_config.mac[4], host_config.mac[5] }; // Setup notifiers - _rxq.set_notifier(std::make_unique(_virtio, 0)); - _txq.set_notifier(std::make_unique(_virtio, 1)); + _rxq.set_notifier(std::make_unique(_virtio, 0)); + _txq.set_notifier(std::make_unique(_virtio, 1)); // Tell the host where we put the rings (we already allocated them earlier) @@ -999,17 +1005,17 @@ virtio_qp_osv::virtio_qp_osv(osv::assigned_virtio &virtio, } #endif -void virtio_device::init_local_queue(boost::program_options::variables_map opts) { - std::unique_ptr ptr; +void device::init_local_queue(boost::program_options::variables_map opts) { + std::unique_ptr ptr; if (engine.cpu_id() == 0) { #ifdef HAVE_OSV if (osv::assigned_virtio::get && osv::assigned_virtio::get()) { std::cout << "In OSv and assigned host's virtio device\n"; - ptr = std::make_unique(*osv::assigned_virtio::get(), opts); + ptr = std::make_unique(*osv::assigned_virtio::get(), opts); } else #endif - ptr = std::make_unique(this, opts); + ptr = std::make_unique(this, opts); for (unsigned i = 0; i < smp::count; i++) { if (i != engine.cpu_id()) { @@ -1022,10 +1028,12 @@ void virtio_device::init_local_queue(boost::program_options::variables_map opts) set_local_queue(std::move(ptr)); } +} + std::unique_ptr create_virtio_net_device(boost::program_options::variables_map opts) { if (engine.cpu_id() == 0) { - return std::make_unique(opts); + return std::make_unique(opts); } else { return nullptr; } From 5c4ae7a7269aa8b6165ef88052c8002235871428 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Dec 2014 10:42:06 +0200 Subject: [PATCH 2/8] virtio: minor code movement --- net/virtio.cc | 60 ++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/net/virtio.cc b/net/virtio.cc index 9cd35d33c7..2ae6678968 100644 --- a/net/virtio.cc +++ b/net/virtio.cc @@ -788,38 +788,6 @@ qp::send(packet p) { return _txq.post(std::move(p)); } -} - -boost::program_options::options_description -get_virtio_net_options_description() -{ - boost::program_options::options_description opts( - "Virtio net options"); - opts.add_options() - ("event-index", - boost::program_options::value()->default_value("on"), - "Enable event-index feature (on / off)") - ("csum-offload", - boost::program_options::value()->default_value("on"), - "Enable checksum offload feature (on / off)") - ("tso", - boost::program_options::value()->default_value("on"), - "Enable TCP segment offload feature (on / off)") - ("ufo", - boost::program_options::value()->default_value("on"), - "Enable UDP fragmentation offload feature (on / off)") - ("virtio-ring-size", - boost::program_options::value()->default_value(256), - "Virtio ring size (must be power-of-two)") - ("virtio-poll-mode", - boost::program_options::value()->default_value(false), - "Poll virtio rings instead of using interrupts") - ; - return opts; -} - -namespace virtio { - class qp_vhost : public qp { private: // The vhost file descriptor needs to remain open throughout the life of @@ -1030,6 +998,34 @@ void device::init_local_queue(boost::program_options::variables_map opts) { } +boost::program_options::options_description +get_virtio_net_options_description() +{ + boost::program_options::options_description opts( + "Virtio net options"); + opts.add_options() + ("event-index", + boost::program_options::value()->default_value("on"), + "Enable event-index feature (on / off)") + ("csum-offload", + boost::program_options::value()->default_value("on"), + "Enable checksum offload feature (on / off)") + ("tso", + boost::program_options::value()->default_value("on"), + "Enable TCP segment offload feature (on / off)") + ("ufo", + boost::program_options::value()->default_value("on"), + "Enable UDP fragmentation offload feature (on / off)") + ("virtio-ring-size", + boost::program_options::value()->default_value(256), + "Virtio ring size (must be power-of-two)") + ("virtio-poll-mode", + boost::program_options::value()->default_value(false), + "Poll virtio rings instead of using interrupts") + ; + return opts; +} + std::unique_ptr create_virtio_net_device(boost::program_options::variables_map opts) { if (engine.cpu_id() == 0) { From fcbcc19231b65794267bdb72c1e0445516bed1b6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Dec 2014 10:44:10 +0200 Subject: [PATCH 3/8] virtio: remove buffer_chain class It's a concept that is instantiated by its users, not a true class. --- net/virtio.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/virtio.cc b/net/virtio.cc index 2ae6678968..1feaa2994c 100644 --- a/net/virtio.cc +++ b/net/virtio.cc @@ -160,6 +160,12 @@ public: using phys = uint64_t; +// The 'buffer_chain' concept, used in vring, is a container of vring::buffer +// with an added 'promise<> completed' member, as in: +// +// struct buffer_chain : std::vector { +// promise completed; +// }; class vring { public: struct config { @@ -176,9 +182,6 @@ public: uint32_t len; bool writeable; }; - struct buffer_chain : std::vector { - promise completed; - }; private: class desc { public: From f3d2908757d5e2d3c5006a1d76da58fb283369d1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Dec 2014 10:49:42 +0200 Subject: [PATCH 4/8] virtio: move buffer and config out of vring class Prior to templating it, best to get the common elements out. --- net/virtio.cc | 85 ++++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/net/virtio.cc b/net/virtio.cc index 1feaa2994c..fc8a92e01e 100644 --- a/net/virtio.cc +++ b/net/virtio.cc @@ -160,28 +160,29 @@ public: using phys = uint64_t; -// The 'buffer_chain' concept, used in vring, is a container of vring::buffer +struct ring_config { + char* descs; + char* avail; + char* used; + unsigned size; + bool event_index; + bool indirect; + bool mergable_buffers; +}; + +struct buffer { + phys addr; + uint32_t len; + bool writeable; +}; + +// The 'buffer_chain' concept, used in vring, is a container of buffer // with an added 'promise<> completed' member, as in: // // struct buffer_chain : std::vector { // promise completed; // }; class vring { -public: - struct config { - char* descs; - char* avail; - char* used; - unsigned size; - bool event_index; - bool indirect; - bool mergable_buffers; - }; - struct buffer { - phys addr; - uint32_t len; - bool writeable; - }; private: class desc { public: @@ -253,18 +254,18 @@ private: }; struct avail { - explicit avail(config conf); + explicit avail(ring_config conf); avail_layout* _shared; uint16_t _head = 0; uint16_t _avail_added_since_kick = 0; }; struct used { - explicit used(config conf); + explicit used(ring_config conf); used_layout* _shared; uint16_t _tail = 0; }; private: - config _config; + ring_config _config; std::unique_ptr _notifier; std::unique_ptr[]> _completions; desc* _descs; @@ -280,11 +281,11 @@ private: bool _poll_mode = false; public: - explicit vring(config conf, bool poll_mode); + explicit vring(ring_config conf, bool poll_mode); void set_notifier(std::unique_ptr notifier) { _notifier = std::move(notifier); } - const config& getconfig() { + const ring_config& getconfig() { return _config; } void wake_notifier_wait() { @@ -373,11 +374,11 @@ private: void setup(); }; -vring::avail::avail(config conf) +vring::avail::avail(ring_config conf) : _shared(reinterpret_cast(conf.avail)) { } -vring::used::used(config conf) +vring::used::used(ring_config conf) : _shared(reinterpret_cast(conf.used)) { } @@ -393,7 +394,7 @@ unsigned vring::allocate_desc() { return desc; } -vring::vring(config conf, bool poll_mode) +vring::vring(ring_config conf, bool poll_mode) : _config(conf) , _completions(new promise[_config.size]) , _descs(reinterpret_cast(conf.descs)) @@ -533,11 +534,11 @@ protected: qp& _dev; vring _ring; public: - txq(qp& dev, vring::config config, bool poll_mode); + txq(qp& dev, ring_config config, bool poll_mode); void set_notifier(std::unique_ptr notifier) { _ring.set_notifier(std::move(notifier)); } - const vring::config& getconfig() { + const ring_config& getconfig() { return _ring.getconfig(); } void wake_notifier_wait() { @@ -553,11 +554,11 @@ protected: std::vector _fragments; std::vector> _deleters; public: - rxq(qp& _if, vring::config config, bool poll_mode); + rxq(qp& _if, ring_config config, bool poll_mode); void set_notifier(std::unique_ptr notifier) { _ring.set_notifier(std::move(notifier)); } - const vring::config& getconfig() { + const ring_config& getconfig() { return _ring.getconfig(); } void run() { @@ -578,9 +579,9 @@ protected: txq _txq; rxq _rxq; protected: - vring::config txq_config(size_t txq_ring_size); - vring::config rxq_config(size_t rxq_ring_size); - void common_config(vring::config& r); + ring_config txq_config(size_t txq_ring_size); + ring_config rxq_config(size_t rxq_ring_size); + void common_config(ring_config& r); size_t vring_storage_size(size_t ring_size); public: explicit qp(device* dev, size_t rx_ring_size, size_t tx_ring_size, bool poll_mode); @@ -591,7 +592,7 @@ public: } }; -qp::txq::txq(qp& dev, vring::config config, bool poll_mode) +qp::txq::txq(qp& dev, ring_config config, bool poll_mode) : _dev(dev), _ring(config, poll_mode) { } @@ -644,7 +645,7 @@ qp::txq::post(packet p) { auto nr_frags = q.nr_frags(); return _ring.available_descriptors().wait(nr_frags).then([this, nr_frags, p = std::move(q)] () mutable { static auto fragment_to_buffer = [this] (fragment f) { - vring::buffer b; + buffer b; b.addr = _dev.virt_to_phys(f.base); b.len = f.size; b.writeable = false; @@ -669,7 +670,7 @@ qp::txq::post(packet p) { }); } -qp::rxq::rxq(qp& dev, vring::config config, bool poll_mode) +qp::rxq::rxq(qp& dev, ring_config config, bool poll_mode) : _dev(dev), _ring(config, poll_mode) { } @@ -683,11 +684,11 @@ qp::rxq::prepare_buffers() { count += opportunistic; } auto make_buffer_chain = [this] { - struct single_buffer_and_completion : std::array { + struct single_buffer_and_completion : std::array { promise completed; } bc; std::unique_ptr buf(reinterpret_cast(malloc(4096))); - vring::buffer& b = bc[0]; + buffer& b = bc[0]; b.addr = _dev.virt_to_phys(buf.get()); b.len = 4096; b.writeable = true; @@ -756,15 +757,15 @@ size_t qp::vring_storage_size(size_t ring_size) { return 3 * 4096 + ring_size * (16 + 2 + 8); } -void qp::common_config(vring::config& r) { +void qp::common_config(ring_config& r) { r.avail = r.descs + 16 * r.size; r.used = align_up(r.avail + 2 * r.size + 6, 4096); r.event_index = (_dev->features() & VIRTIO_RING_F_EVENT_IDX) != 0; r.indirect = false; } -vring::config qp::txq_config(size_t tx_ring_size) { - vring::config r; +ring_config qp::txq_config(size_t tx_ring_size) { + ring_config r; r.size = tx_ring_size; r.descs = _txq_storage.get(); r.mergable_buffers = false; @@ -772,8 +773,8 @@ vring::config qp::txq_config(size_t tx_ring_size) { return r; } -vring::config qp::rxq_config(size_t rx_ring_size) { - vring::config r; +ring_config qp::rxq_config(size_t rx_ring_size) { + ring_config r; r.size = rx_ring_size; r.descs = _rxq_storage.get(); r.mergable_buffers = true; @@ -936,7 +937,7 @@ qp_osv::qp_osv(osv::assigned_virtio &virtio, // Get the MAC address set by the host assert(subset & VIRTIO_NET_F_MAC); struct net_config { - /* The config defining mac address (if VIRTIO_NET_F_MAC) */ + /* The ring_config defining mac address (if VIRTIO_NET_F_MAC) */ uint8_t mac[6]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* */ uint16_t status; From a86faf020947033933e7e17781b198e81384748b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Dec 2014 11:55:11 +0200 Subject: [PATCH 5/8] virtio: de-virtualize virt_to_phys It is not a device property, but a system property. --- net/virtio.cc | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/net/virtio.cc b/net/virtio.cc index fc8a92e01e..e12349d61d 100644 --- a/net/virtio.cc +++ b/net/virtio.cc @@ -32,6 +32,22 @@ using namespace net; namespace virtio { +using phys = uint64_t; + +#ifndef HAVE_OSV + +phys virt_to_phys(void* p) { + return reinterpret_cast(p); +} + +#else + +phys virt_to_phys(void* p) { + return osv::assigned_virtio::virt_to_phys(p); +} + +#endif + class device : public net::device { private: boost::program_options::variables_map _opts; @@ -158,8 +174,6 @@ public: }; #endif -using phys = uint64_t; - struct ring_config { char* descs; char* avail; @@ -587,9 +601,6 @@ public: explicit qp(device* dev, size_t rx_ring_size, size_t tx_ring_size, bool poll_mode); virtual future<> send(packet p) override; virtual void rx_start() override; - virtual phys virt_to_phys(void* p) { - return reinterpret_cast(p); - } }; qp::txq::txq(qp& dev, ring_config config, bool poll_mode) @@ -646,7 +657,7 @@ qp::txq::post(packet p) { return _ring.available_descriptors().wait(nr_frags).then([this, nr_frags, p = std::move(q)] () mutable { static auto fragment_to_buffer = [this] (fragment f) { buffer b; - b.addr = _dev.virt_to_phys(f.base); + b.addr = virt_to_phys(f.base); b.len = f.size; b.writeable = false; return b; @@ -689,7 +700,7 @@ qp::rxq::prepare_buffers() { } bc; std::unique_ptr buf(reinterpret_cast(malloc(4096))); buffer& b = bc[0]; - b.addr = _dev.virt_to_phys(buf.get()); + b.addr = virt_to_phys(buf.get()); b.len = 4096; b.writeable = true; bc.completed.get_future().then([this, buf = std::move(buf)] (size_t len) mutable { @@ -904,9 +915,6 @@ public: virtual ethernet_address hw_address() override { return _mac; } - virtual phys virt_to_phys(void* p) override { - return osv::assigned_virtio::virt_to_phys(p); - } }; qp_osv::qp_osv(osv::assigned_virtio &virtio, From c7c0aebf07bffa5a4772f1b51cf750d86c243ffe Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Dec 2014 12:24:58 +0200 Subject: [PATCH 6/8] virtio: abstract vring request completions Currently vring request completions are handled by fulfilling a promise contained in the request. While promises are very flexible, this comes at a cost (allocating and executing a task), and this flexibility is unneeded when request handling is very regular (such as in virtio-net rx and tx completion handling). Make vring more flexible by allowing the completion function to be specified as a template parameter. No changes to the actual users - they now specify the completion function as fulfilling the same promise as vring previously did. --- net/virtio.cc | 132 +++++++++++++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 54 deletions(-) diff --git a/net/virtio.cc b/net/virtio.cc index e12349d61d..a74b62a424 100644 --- a/net/virtio.cc +++ b/net/virtio.cc @@ -190,12 +190,15 @@ struct buffer { bool writeable; }; -// The 'buffer_chain' concept, used in vring, is a container of buffer -// with an added 'promise<> completed' member, as in: +// The 'buffer_chain' concept, used in vring, is a container of buffers, as in: // -// struct buffer_chain : std::vector { -// promise completed; -// }; +// using buffer_chain = std::vector; +// +// The 'Completion' concept is a functor with the signature: +// +// void (buffer_chain&, size_t len); +// +template class vring { private: class desc { @@ -280,8 +283,9 @@ private: }; private: ring_config _config; + Completion _complete; std::unique_ptr _notifier; - std::unique_ptr[]> _completions; + std::unique_ptr _buffer_chains; desc* _descs; avail _avail; used _used; @@ -295,7 +299,7 @@ private: bool _poll_mode = false; public: - explicit vring(ring_config conf, bool poll_mode); + explicit vring(ring_config conf, Completion complete, bool poll_mode); void set_notifier(std::unique_ptr notifier) { _notifier = std::move(notifier); } @@ -388,16 +392,20 @@ private: void setup(); }; -vring::avail::avail(ring_config conf) +template +vring::avail::avail(ring_config conf) : _shared(reinterpret_cast(conf.avail)) { } -vring::used::used(ring_config conf) +template +vring::used::used(ring_config conf) : _shared(reinterpret_cast(conf.used)) { } +template inline -unsigned vring::allocate_desc() { +unsigned +vring::allocate_desc() { assert(_free_head != -1); auto desc = _free_head; if (desc == _free_last) { @@ -408,9 +416,11 @@ unsigned vring::allocate_desc() { return desc; } -vring::vring(ring_config conf, bool poll_mode) +template +vring::vring(ring_config conf, Completion complete, bool poll_mode) : _config(conf) - , _completions(new promise[_config.size]) + , _complete(complete) + , _buffer_chains(new BufferChain[_config.size]) , _descs(reinterpret_cast(conf.descs)) , _avail(conf) , _used(conf) @@ -421,7 +431,8 @@ vring::vring(ring_config conf, bool poll_mode) setup(); } -void vring::setup() { +template +void vring::setup() { for (unsigned i = 0; i < _config.size; ++i) { _descs[i]._next = i + 1; } @@ -430,7 +441,8 @@ void vring::setup() { _available_descriptors.signal(_config.size); } -void vring::run() { +template +void vring::run() { if (!_poll_mode) { complete(); } else { @@ -442,7 +454,8 @@ void vring::run() { } } -void vring::flush_batch() { +template +void vring::flush_batch() { if (_batch.empty()) { return; } @@ -454,15 +467,12 @@ void vring::flush_batch() { kick(); } +// Iterator: points at a buffer_chain +template template -void vring::post(Iterator begin, Iterator end) { - // Note: buffer_chain here is any container of buffer, not - // necessarily vector. - // buffer_chain should also include a promise completed - // member, signifying the action to take when the request - // completes. - using buffer_chain = decltype(*begin); - std::for_each(begin, end, [this] (buffer_chain bc) { +void vring::post(Iterator begin, Iterator end) { + for (auto bci = begin; bci!= end; ++bci) { + auto&& bc = *bci; desc pseudo_head = {}; desc* prev = &pseudo_head; for (auto i = bc.begin(); i != bc.end(); ++i) { @@ -471,21 +481,21 @@ void vring::post(Iterator begin, Iterator end) { prev->_next = desc_idx; desc &d = _descs[desc_idx]; d._flags = {}; - auto b = *i; + auto&& b = *i; d._flags.writeable = b.writeable; d._paddr = b.addr; d._len = b.len; prev = &d; } auto desc_head = pseudo_head._next; - _completions[desc_head] = std::move(bc.completed); + _buffer_chains[desc_head] = std::move(bc); if (!_poll_mode) { _avail._shared->_ring[masked(_avail._head++)] = desc_head; } else { _batch.push_back(desc_head); } _avail._avail_added_since_kick++; - }); + } if (!_poll_mode) { _avail._shared->_idx.store(_avail._head, std::memory_order_release); kick(); @@ -495,13 +505,14 @@ void vring::post(Iterator begin, Iterator end) { } } -void vring::do_complete() { +template +void vring::do_complete() { do { disable_interrupts(); auto used_head = _used._shared->_idx.load(std::memory_order_acquire); while (used_head != _used._tail) { auto ue = _used._shared->_used_elements[masked(_used._tail++)]; - _completions[ue._id].set_value(ue._len); + _complete(std::move(_buffer_chains[ue._id]), ue._len); auto id = ue._id; if (_free_last != -1) { _descs[_free_last]._next = id; @@ -522,7 +533,8 @@ void vring::do_complete() { } while (enable_interrupts()); } -void vring::complete() { +template +void vring::complete() { do_complete(); _notifier->wait().then([this] { complete(); @@ -545,8 +557,31 @@ protected: uint16_t num_buffers; }; class txq { + static buffer fragment_to_buffer(fragment f) { + buffer b; + b.addr = virt_to_phys(f.base); + b.len = f.size; + b.writeable = false; + return b; + }; + struct packet_as_buffer_chain { + fragment* start; + fragment* finish; + promise completed; + auto begin() { + return make_transform_iterator(start, fragment_to_buffer); + } + auto end() { + return make_transform_iterator(finish, fragment_to_buffer); + } + }; + struct complete { + void operator()(packet_as_buffer_chain&& bc, size_t len) { + bc.completed.set_value(len); + } + }; qp& _dev; - vring _ring; + vring _ring; public: txq(qp& dev, ring_config config, bool poll_mode); void set_notifier(std::unique_ptr notifier) { @@ -562,8 +597,16 @@ protected: future<> post(packet p); }; class rxq { + struct single_buffer_and_completion : std::array { + promise completed; + }; + struct complete { + void operator()(single_buffer_and_completion&& bc, size_t len) { + bc.completed.set_value(len); + } + }; qp& _dev; - vring _ring; + vring _ring; unsigned _remaining_buffers = 0; std::vector _fragments; std::vector> _deleters; @@ -604,7 +647,7 @@ public: }; qp::txq::txq(qp& dev, ring_config config, bool poll_mode) - : _dev(dev), _ring(config, poll_mode) { + : _dev(dev), _ring(config, complete(), poll_mode) { } future<> @@ -655,24 +698,7 @@ qp::txq::post(packet p) { auto nr_frags = q.nr_frags(); return _ring.available_descriptors().wait(nr_frags).then([this, nr_frags, p = std::move(q)] () mutable { - static auto fragment_to_buffer = [this] (fragment f) { - buffer b; - b.addr = virt_to_phys(f.base); - b.len = f.size; - b.writeable = false; - return b; - }; - struct packet_as_buffer_chain { - fragment* start; - fragment* finish; - promise completed; - auto begin() { - return make_transform_iterator(start, fragment_to_buffer); - } - auto end() { - return make_transform_iterator(finish, fragment_to_buffer); - } - } vbc[1] { { p.fragments().begin(), p.fragments().end() } }; + packet_as_buffer_chain vbc[1] { { p.fragments().begin(), p.fragments().end() } }; // schedule packet destruction vbc[0].completed.get_future().then([this, nr_frags, p = std::move(p)] (size_t) { _ring.available_descriptors().signal(nr_frags); @@ -682,7 +708,7 @@ qp::txq::post(packet p) { } qp::rxq::rxq(qp& dev, ring_config config, bool poll_mode) - : _dev(dev), _ring(config, poll_mode) { + : _dev(dev), _ring(config, complete(), poll_mode) { } future<> @@ -695,9 +721,7 @@ qp::rxq::prepare_buffers() { count += opportunistic; } auto make_buffer_chain = [this] { - struct single_buffer_and_completion : std::array { - promise completed; - } bc; + single_buffer_and_completion bc; std::unique_ptr buf(reinterpret_cast(malloc(4096))); buffer& b = bc[0]; b.addr = virt_to_phys(buf.get()); From 1ee959d3e27e959498a4bb88d80ef59768050a78 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Dec 2014 12:34:04 +0200 Subject: [PATCH 7/8] virtio: de-futurize transmit Move completion handling (destroy packet, adjust descriptors count) to a completion function rather than a future. Reduces allocations and task executed. --- net/virtio.cc | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/net/virtio.cc b/net/virtio.cc index a74b62a424..c28f81ea79 100644 --- a/net/virtio.cc +++ b/net/virtio.cc @@ -565,19 +565,20 @@ protected: return b; }; struct packet_as_buffer_chain { - fragment* start; - fragment* finish; - promise completed; + packet p; auto begin() { - return make_transform_iterator(start, fragment_to_buffer); + return make_transform_iterator(p.fragments().begin(), fragment_to_buffer); } auto end() { - return make_transform_iterator(finish, fragment_to_buffer); + return make_transform_iterator(p.fragments().end(), fragment_to_buffer); } }; struct complete { + txq& q; void operator()(packet_as_buffer_chain&& bc, size_t len) { - bc.completed.set_value(len); + // move the packet here, to be destroyed on scope exit + auto p = std::move(bc.p); + q._ring.available_descriptors().signal(p.nr_frags()); } }; qp& _dev; @@ -647,7 +648,7 @@ public: }; qp::txq::txq(qp& dev, ring_config config, bool poll_mode) - : _dev(dev), _ring(config, complete(), poll_mode) { + : _dev(dev), _ring(config, complete{*this}, poll_mode) { } future<> @@ -698,11 +699,7 @@ qp::txq::post(packet p) { auto nr_frags = q.nr_frags(); return _ring.available_descriptors().wait(nr_frags).then([this, nr_frags, p = std::move(q)] () mutable { - packet_as_buffer_chain vbc[1] { { p.fragments().begin(), p.fragments().end() } }; - // schedule packet destruction - vbc[0].completed.get_future().then([this, nr_frags, p = std::move(p)] (size_t) { - _ring.available_descriptors().signal(nr_frags); - }); + packet_as_buffer_chain vbc[1] { { std::move(p) } }; _ring.post(std::begin(vbc), std::end(vbc)); }); } From 508322c7dac9ff8df56c4dcdc2f5729997e7110c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 14 Dec 2014 12:34:04 +0200 Subject: [PATCH 8/8] virtio: de-futurize receive Move completion handling (destroy packet, adjust descriptors count) to a completion function rather than a future. Reduces allocations and task executed. --- net/virtio.cc | 92 ++++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/net/virtio.cc b/net/virtio.cc index c28f81ea79..5f3db99ee3 100644 --- a/net/virtio.cc +++ b/net/virtio.cc @@ -598,16 +598,18 @@ protected: future<> post(packet p); }; class rxq { - struct single_buffer_and_completion : std::array { - promise completed; + struct buffer_and_virt : buffer { + std::unique_ptr buf; }; + using single_buffer = std::array; struct complete { - void operator()(single_buffer_and_completion&& bc, size_t len) { - bc.completed.set_value(len); + rxq& q; + void operator()(single_buffer&& bc, size_t len) { + q.complete_buffer(std::move(bc), len); } }; qp& _dev; - vring _ring; + vring _ring; unsigned _remaining_buffers = 0; std::vector _fragments; std::vector> _deleters; @@ -628,6 +630,7 @@ protected: } private: future<> prepare_buffers(); + void complete_buffer(single_buffer&& b, size_t len); }; protected: device* _dev; @@ -705,7 +708,7 @@ qp::txq::post(packet p) { } qp::rxq::rxq(qp& dev, ring_config config, bool poll_mode) - : _dev(dev), _ring(config, complete(), poll_mode) { + : _dev(dev), _ring(config, complete{*this}, poll_mode) { } future<> @@ -718,46 +721,13 @@ qp::rxq::prepare_buffers() { count += opportunistic; } auto make_buffer_chain = [this] { - single_buffer_and_completion bc; + single_buffer bc; std::unique_ptr buf(reinterpret_cast(malloc(4096))); - buffer& b = bc[0]; + buffer_and_virt& b = bc[0]; b.addr = virt_to_phys(buf.get()); b.len = 4096; b.writeable = true; - bc.completed.get_future().then([this, buf = std::move(buf)] (size_t len) mutable { - auto frag_buf = buf.get(); - auto frag_len = len; - // First buffer - if (_remaining_buffers == 0) { - auto hdr = reinterpret_cast(frag_buf); - assert(hdr->num_buffers >= 1); - // TODO: special-case for num_buffers == 1 - _remaining_buffers = hdr->num_buffers; - frag_buf += _dev._header_len; - frag_len -= _dev._header_len; - _fragments.clear(); - _deleters.clear(); - }; - - // Append current buffer - _fragments.emplace_back(fragment{frag_buf, frag_len}); - _deleters.push_back(std::move(buf)); - _remaining_buffers--; - - // Last buffer - if (_remaining_buffers == 0) { - deleter del; - if (_deleters.size() == 1) { - del = make_free_deleter(_deleters[0].release()); - _deleters.clear(); - } else { - del = make_deleter(deleter(), [deleters = std::move(_deleters)] {}); - } - packet p(_fragments.begin(), _fragments.end(), std::move(del)); - _dev._dev->l2receive(std::move(p)); - _ring.available_descriptors().signal(_fragments.size()); - } - }); + b.buf = std::move(buf); return bc; }; auto start = make_function_input_iterator(make_buffer_chain, 0U); @@ -766,6 +736,44 @@ qp::rxq::prepare_buffers() { }); } +void +qp::rxq::complete_buffer(single_buffer&& bc, size_t len) { + auto&& sb = bc[0]; + auto&& buf = sb.buf; + auto frag_buf = buf.get(); + auto frag_len = len; + // First buffer + if (_remaining_buffers == 0) { + auto hdr = reinterpret_cast(frag_buf); + assert(hdr->num_buffers >= 1); + // TODO: special-case for num_buffers == 1 + _remaining_buffers = hdr->num_buffers; + frag_buf += _dev._header_len; + frag_len -= _dev._header_len; + _fragments.clear(); + _deleters.clear(); + }; + + // Append current buffer + _fragments.emplace_back(fragment{frag_buf, frag_len}); + _deleters.push_back(std::move(buf)); + _remaining_buffers--; + + // Last buffer + if (_remaining_buffers == 0) { + deleter del; + if (_deleters.size() == 1) { + del = make_free_deleter(_deleters[0].release()); + _deleters.clear(); + } else { + del = make_deleter(deleter(), [deleters = std::move(_deleters)] {}); + } + packet p(_fragments.begin(), _fragments.end(), std::move(del)); + _dev._dev->l2receive(std::move(p)); + _ring.available_descriptors().signal(_fragments.size()); + } +} + // Allocate and zero-initialize a buffer which is page-aligned and can be // used for virt_to_phys (i.e., physically contiguous). static std::unique_ptr virtio_buffer(size_t size) {