mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-23 18:10:39 +00:00
net: Support TCP checksum offload
It gives ~5% httpd improvements on monster. csum-offload option is added, e.g., to disable: ./httpd --network-stack native --csum-offload off
This commit is contained in:
@@ -90,6 +90,11 @@ public:
|
||||
throw_system_error_on(r == -1);
|
||||
return r;
|
||||
}
|
||||
int ioctl(int request, unsigned int value) {
|
||||
int r = ::ioctl(_fd, request, value);
|
||||
throw_system_error_on(r == -1);
|
||||
return r;
|
||||
}
|
||||
template <class X>
|
||||
int ioctl(int request, X& data) {
|
||||
int r = ::ioctl(_fd, request, &data);
|
||||
|
||||
13
net/ip.cc
13
net/ip.cc
@@ -18,7 +18,8 @@ std::ostream& operator<<(std::ostream& os, ipv4_address a) {
|
||||
}
|
||||
|
||||
ipv4::ipv4(interface* netif)
|
||||
: _global_arp(netif)
|
||||
: _netif(netif)
|
||||
, _global_arp(netif)
|
||||
, _arp(_global_arp)
|
||||
, _l3(netif, 0x0800)
|
||||
, _rx_packets(_l3.receive([this] (packet p, ethernet_address ea) {
|
||||
@@ -38,10 +39,12 @@ ipv4::handle_received_packet(packet p, ethernet_address from) {
|
||||
if (!iph) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
checksummer csum;
|
||||
csum.sum(reinterpret_cast<char*>(iph), sizeof(*iph));
|
||||
if (csum.get() != 0) {
|
||||
return make_ready_future<>();
|
||||
if (!hw_features().rx_csum_offload) {
|
||||
checksummer csum;
|
||||
csum.sum(reinterpret_cast<char*>(iph), sizeof(*iph));
|
||||
if (csum.get() != 0) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
ntoh(*iph);
|
||||
// FIXME: process options
|
||||
|
||||
@@ -70,6 +70,7 @@ struct ipv4_traits {
|
||||
|
||||
template <uint8_t ProtoNum>
|
||||
class ipv4_l4 {
|
||||
public:
|
||||
ipv4& _inet;
|
||||
public:
|
||||
ipv4_l4(ipv4& inet) : _inet(inet) {}
|
||||
@@ -100,6 +101,7 @@ public:
|
||||
static address_type broadcast_address() { return ipv4_address(0xffffffff); }
|
||||
static proto_type arp_protocol_type() { return 0x0800; }
|
||||
private:
|
||||
interface* _netif;
|
||||
arp _global_arp;
|
||||
arp_for<ipv4> _arp;
|
||||
ipv4_address _host_address;
|
||||
@@ -118,6 +120,7 @@ public:
|
||||
void send(ipv4_address to, uint8_t proto_num, packet p);
|
||||
tcp<ipv4_traits>& get_tcp() { return _tcp._tcp; }
|
||||
void register_l4(proto_type id, ip_protocol* handler);
|
||||
net::hw_features hw_features() { return _netif->hw_features(); }
|
||||
};
|
||||
|
||||
template <uint8_t ProtoNum>
|
||||
|
||||
@@ -26,7 +26,8 @@ future<> l3_protocol::send(ethernet_address to, packet p) {
|
||||
interface::interface(std::unique_ptr<device> dev)
|
||||
: _dev(std::move(dev))
|
||||
, _rx(_dev->receive([this] (packet p) { return dispatch_packet(std::move(p)); }))
|
||||
, _hw_address(_dev->hw_address()) {
|
||||
, _hw_address(_dev->hw_address())
|
||||
, _hw_features(_dev->hw_features()) {
|
||||
}
|
||||
|
||||
subscription<packet, ethernet_address>
|
||||
|
||||
11
net/net.hh
11
net/net.hh
@@ -20,6 +20,13 @@ class interface;
|
||||
class device;
|
||||
class l3_protocol;
|
||||
|
||||
struct hw_features {
|
||||
// Enable tx checksum offload
|
||||
bool tx_csum_offload;
|
||||
// Enable rx checksum offload
|
||||
bool rx_csum_offload;
|
||||
};
|
||||
|
||||
class l3_protocol {
|
||||
interface* _netif;
|
||||
uint16_t _proto_num;
|
||||
@@ -42,12 +49,14 @@ class interface {
|
||||
};
|
||||
std::unordered_map<uint16_t, l3_rx_stream> _proto_map;
|
||||
ethernet_address _hw_address;
|
||||
net::hw_features _hw_features;
|
||||
private:
|
||||
future<> dispatch_packet(packet p);
|
||||
future<> send(uint16_t proto_num, ethernet_address to, packet p);
|
||||
public:
|
||||
explicit interface(std::unique_ptr<device> dev);
|
||||
ethernet_address hw_address() { return _hw_address; }
|
||||
net::hw_features hw_features() { return _hw_features; }
|
||||
subscription<packet, ethernet_address> register_l3(uint16_t proto_num,
|
||||
std::function<future<> (packet p, ethernet_address from)> next);
|
||||
friend class l3_protocol;
|
||||
@@ -59,7 +68,9 @@ public:
|
||||
virtual subscription<packet> receive(std::function<future<> (packet)> next_packet) = 0;
|
||||
virtual future<> send(packet p) = 0;
|
||||
virtual ethernet_address hw_address() = 0;
|
||||
virtual net::hw_features hw_features() = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif /* NET_HH_ */
|
||||
|
||||
32
net/tcp.hh
32
net/tcp.hh
@@ -219,6 +219,7 @@ public:
|
||||
explicit tcp(inet_type& inet) : _inet(inet) {}
|
||||
void received(packet p, ipaddr from, ipaddr to);
|
||||
listener listen(uint16_t port, size_t queue_length = 100);
|
||||
net::hw_features hw_features() { return _inet._inet.hw_features(); }
|
||||
private:
|
||||
void send(ipaddr from, ipaddr to, packet p);
|
||||
void respond_with_reset(tcp_hdr* rth, ipaddr local_ip, ipaddr foreign_ip);
|
||||
@@ -253,11 +254,14 @@ void tcp<InetTraits>::received(packet p, ipaddr from, ipaddr to) {
|
||||
if (unsigned(th->data_offset * 4) < sizeof(*th)) {
|
||||
return;
|
||||
}
|
||||
checksummer csum;
|
||||
InetTraits::pseudo_header_checksum(csum, from, to, p.len());
|
||||
csum.sum(p);
|
||||
if (csum.get() != 0) {
|
||||
return;
|
||||
|
||||
if (!hw_features().rx_csum_offload) {
|
||||
checksummer csum;
|
||||
InetTraits::pseudo_header_checksum(csum, from, to, p.len());
|
||||
csum.sum(p);
|
||||
if (csum.get() != 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// FIXME: process options
|
||||
p.trim_front(th->data_offset * 4);
|
||||
@@ -332,8 +336,12 @@ void tcp<InetTraits>::respond_with_reset(tcp_hdr* rth, ipaddr local_ip, ipaddr f
|
||||
|
||||
checksummer csum;
|
||||
InetTraits::pseudo_header_checksum(csum, local_ip, foreign_ip, sizeof(*th));
|
||||
csum.sum(p);
|
||||
th->checksum = csum.get();
|
||||
if (hw_features().tx_csum_offload) {
|
||||
th->checksum = ~csum.get();
|
||||
} else {
|
||||
csum.sum(p);
|
||||
th->checksum = csum.get();
|
||||
}
|
||||
|
||||
send(local_ip, foreign_ip, std::move(p));
|
||||
}
|
||||
@@ -536,8 +544,14 @@ void tcp<InetTraits>::tcb::output() {
|
||||
|
||||
checksummer csum;
|
||||
InetTraits::pseudo_header_checksum(csum, _local_ip, _foreign_ip, sizeof(*th) + len);
|
||||
csum.sum(p);
|
||||
th->checksum = csum.get();
|
||||
if (_tcp.hw_features().tx_csum_offload) {
|
||||
// virtio-net's VIRTIO_NET_F_CSUM feature requires th->checksum to be
|
||||
// initialized to ones' complement sum of the pseudo header.
|
||||
th->checksum = ~csum.get();
|
||||
} else {
|
||||
csum.sum(p);
|
||||
th->checksum = csum.get();
|
||||
}
|
||||
|
||||
_tcp.send(_local_ip, _foreign_ip, std::move(p));
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <linux/vhost.h>
|
||||
#include <linux/if.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include "tcp.hh"
|
||||
|
||||
using namespace net;
|
||||
|
||||
@@ -399,12 +400,14 @@ private:
|
||||
std::unique_ptr<char[], free_deleter> _rxq_storage;
|
||||
boost::program_options::variables_map _opts;
|
||||
uint64_t _features;
|
||||
net::hw_features _hw_features;
|
||||
txq _txq;
|
||||
rxq _rxq;
|
||||
stream<packet> _rx_stream;
|
||||
future<> _rx_ready;
|
||||
private:
|
||||
uint64_t setup_features();
|
||||
void setup_tap_device(sstring tap_device);
|
||||
vring::config txq_config();
|
||||
vring::config rxq_config();
|
||||
void common_config(vring::config& r);
|
||||
@@ -414,6 +417,7 @@ public:
|
||||
virtual subscription<packet> receive(std::function<future<> (packet)> next) override;
|
||||
virtual future<> send(packet p) override;
|
||||
virtual ethernet_address hw_address() override;
|
||||
virtual net::hw_features hw_features() override;
|
||||
};
|
||||
|
||||
virtio_net_device::txq::txq(virtio_net_device& dev, vring::config config,
|
||||
@@ -429,6 +433,20 @@ virtio_net_device::txq::transmit(semaphore& available) {
|
||||
_tx_queue.pop();
|
||||
// Linux requires that hdr_len be sane even if gso is disabled.
|
||||
net_hdr_mrg vhdr = {};
|
||||
|
||||
// Handle TCP checksum offload
|
||||
if (_dev.hw_features().tx_csum_offload) {
|
||||
// FIXME: No magic numbers
|
||||
auto hdr = p.get_header<tcp_hdr>(14+ 20);
|
||||
if (hdr) {
|
||||
vhdr.needs_csum = 1;
|
||||
// 14 bytes ethernet header and 20 bytes IP header
|
||||
vhdr.csum_start = 14 + 20;
|
||||
// TCP checksum filed's offset within the TCP header is 16 bytes
|
||||
vhdr.csum_offset = 16;
|
||||
}
|
||||
}
|
||||
|
||||
// prepend virtio-net header
|
||||
packet q = packet(fragment{reinterpret_cast<char*>(&vhdr), _dev._header_len},
|
||||
std::move(p));
|
||||
@@ -496,6 +514,21 @@ virtio_net_device::rxq::prepare_buffers(semaphore& available) {
|
||||
});
|
||||
}
|
||||
|
||||
void virtio_net_device::setup_tap_device(sstring tap_device) {
|
||||
assert(tap_device.size() + 1 <= IFNAMSIZ);
|
||||
|
||||
ifreq ifr = {};
|
||||
ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR;
|
||||
strcpy(ifr.ifr_ifrn.ifrn_name, tap_device.c_str());
|
||||
_tap_fd.ioctl(TUNSETIFF, ifr);
|
||||
|
||||
unsigned int offload = 0;
|
||||
if (hw_features().tx_csum_offload && hw_features().rx_csum_offload) {
|
||||
offload = TUN_F_CSUM;
|
||||
}
|
||||
_tap_fd.ioctl(TUNSETOFFLOAD, offload);
|
||||
}
|
||||
|
||||
virtio_net_device::virtio_net_device(sstring tap_device, boost::program_options::variables_map opts, init x)
|
||||
: _tap_fd(file_desc::open("/dev/net/tun", O_RDWR | O_NONBLOCK))
|
||||
, _vhost_fd(file_desc::open("/dev/vhost-net", O_RDWR))
|
||||
@@ -507,11 +540,7 @@ virtio_net_device::virtio_net_device(sstring tap_device, boost::program_options:
|
||||
, _rxq(*this, rxq_config(), std::move(x._rxq_notify), std::move(x._rxq_kick))
|
||||
, _rx_stream()
|
||||
, _rx_ready(_rx_stream.started()) {
|
||||
assert(tap_device.size() + 1 <= IFNAMSIZ);
|
||||
ifreq ifr = {};
|
||||
ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR;
|
||||
strcpy(ifr.ifr_ifrn.ifrn_name, tap_device.c_str());
|
||||
_tap_fd.ioctl(TUNSETIFF, ifr);
|
||||
setup_tap_device(tap_device);
|
||||
_vhost_fd.ioctl(VHOST_SET_OWNER);
|
||||
auto mem_table = make_struct_with_vla(&vhost_memory::regions, 1);
|
||||
mem_table->nregions = 1;
|
||||
@@ -544,9 +573,18 @@ virtio_net_device::virtio_net_device(sstring tap_device, boost::program_options:
|
||||
|
||||
uint64_t virtio_net_device::setup_features() {
|
||||
int64_t seastar_supported_features = VIRTIO_RING_F_INDIRECT_DESC;
|
||||
|
||||
if (!(_opts.count("event-index") && _opts["event-index"].as<std::string>() == "off")) {
|
||||
seastar_supported_features |= VIRTIO_RING_F_EVENT_IDX;
|
||||
}
|
||||
if (!(_opts.count("csum-offload") && _opts["csum-offload"].as<std::string>() == "off")) {
|
||||
seastar_supported_features |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
|
||||
_hw_features.tx_csum_offload = true;
|
||||
_hw_features.rx_csum_offload = true;
|
||||
} else {
|
||||
_hw_features.tx_csum_offload = false;
|
||||
_hw_features.rx_csum_offload = false;
|
||||
}
|
||||
|
||||
int64_t vhost_supported_features;
|
||||
_vhost_fd.ioctl(VHOST_GET_FEATURES, vhost_supported_features);
|
||||
@@ -601,6 +639,10 @@ ethernet_address virtio_net_device::hw_address() {
|
||||
return {{{ 0x12, 0x23, 0x34, 0x56, 0x67, 0x78 }}};
|
||||
}
|
||||
|
||||
net::hw_features virtio_net_device::hw_features() {
|
||||
return _hw_features;
|
||||
}
|
||||
|
||||
boost::program_options::options_description
|
||||
get_virtio_net_options_description()
|
||||
{
|
||||
@@ -610,6 +652,9 @@ get_virtio_net_options_description()
|
||||
("event-index",
|
||||
boost::program_options::value<std::string>()->default_value("on"),
|
||||
"Enable event-index feature (on / off)")
|
||||
("csum-offload",
|
||||
boost::program_options::value<std::string>()->default_value("on"),
|
||||
"Enable checksum offload feature (on / off)")
|
||||
;
|
||||
return opts;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user