Merge branch 'dpdk_lro-v4' of github.com:cloudius-systems/seastar-dev

This commit is contained in:
Avi Kivity
2015-04-15 12:54:15 +03:00
6 changed files with 166 additions and 44 deletions

View File

@@ -27,12 +27,6 @@ def get_flags():
if line.rstrip('\n').startswith('flags'):
return re.sub(r'^flags\s+: ', '', line).split()
def has_avx():
return 'avx' in get_flags()
def has_avx2():
return 'avx2' in get_flags()
def add_tristate(arg_parser, name, dest, help):
arg_parser.add_argument('--enable-' + name, dest = dest, action = 'store_true', default = None,
help = 'Enable ' + help)
@@ -51,6 +45,35 @@ def apply_tristate(var, test, note, missing):
return False
return False
#
# dpdk_cflags - fetch the DPDK specific CFLAGS
#
# Run a simple makefile that "includes" the DPDK main makefile and prints the
# MACHINE_CFLAGS value
#
def dpdk_cflags (dpdk_target):
with tempfile.NamedTemporaryFile() as sfile:
dpdk_target = os.path.abspath(dpdk_target)
dpdk_target = re.sub(r'\/+$', '', dpdk_target)
dpdk_sdk_path = os.path.dirname(dpdk_target)
dpdk_target_name = os.path.basename(dpdk_target)
dpdk_arch = dpdk_target_name.split('-')[0]
sfile.file.write(bytes('include ' + dpdk_sdk_path + '/mk/rte.vars.mk' + "\n", 'utf-8'))
sfile.file.write(bytes('all:' + "\n\t", 'utf-8'))
sfile.file.write(bytes('@echo $(MACHINE_CFLAGS)' + "\n", 'utf-8'))
sfile.file.flush()
dpdk_cflags = subprocess.check_output(['make', '-f', sfile.name,
'RTE_SDK=' + dpdk_sdk_path,
'RTE_TARGET=' + dpdk_target_name,
'RTE_ARCH=' + dpdk_arch])
dpdk_cflags_str = dpdk_cflags.decode('utf-8')
dpdk_cflags_str = re.sub(r'\n+$', '', dpdk_cflags_str)
dpdk_cflags_final = ''
return dpdk_cflags_str
def try_compile(compiler, source = '', flags = []):
with tempfile.NamedTemporaryFile() as sfile:
sfile.file.write(bytes(source, 'utf-8'))
@@ -292,10 +315,9 @@ if args.with_osv:
if args.dpdk_target:
args.user_cflags = (args.user_cflags +
' -DHAVE_DPDK -I' +
args.dpdk_target + '/include -Wno-error=literal-suffix -Wno-literal-suffix -Wno-invalid-offsetof -m64' +
' -mavx' if has_avx() else '' +
' -mavx2' if has_avx2() else '')
' -DHAVE_DPDK -I' + args.dpdk_target + '/include ' +
dpdk_cflags(args.dpdk_target) +
' -Wno-error=literal-suffix -Wno-literal-suffix -Wno-invalid-offsetof')
libs += (' -L' + args.dpdk_target + '/lib ' +
'-Wl,--whole-archive -lrte_pmd_bond -lrte_pmd_vmxnet3_uio -lrte_pmd_virtio_uio -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_pmd_e1000 -lrte_pmd_ring -Wl,--no-whole-archive -lrte_distributor -lrte_kni -lrte_pipeline -lrte_table -lrte_port -lrte_timer -lrte_hash -lrte_lpm -lrte_power -lrte_acl -lrte_meter -lrte_sched -lrte_kvargs -lrte_mbuf -lrte_ip_frag -lethdev -lrte_eal -lrte_malloc -lrte_mempool -lrte_ring -lrte_cmdline -lrte_cfgfile -lrt -lm -ldl')

View File

@@ -168,6 +168,7 @@ class dpdk_device : public device {
net::hw_features _hw_features;
uint8_t _queues_ready = 0;
unsigned _home_cpu;
bool _use_lro;
std::vector<uint8_t> _redir_table;
#ifdef RTE_VERSION_1_7
struct rte_eth_rxconf _rx_conf_default = {};
@@ -214,10 +215,11 @@ private:
void check_port_link_status();
public:
dpdk_device(uint8_t port_idx, uint16_t num_queues)
dpdk_device(uint8_t port_idx, uint16_t num_queues, bool use_lro)
: _port_idx(port_idx)
, _num_queues(num_queues)
, _home_cpu(engine().cpu_id()) {
, _home_cpu(engine().cpu_id())
, _use_lro(use_lro) {
/* now initialise the port we will use */
int ret = init_port_start();
@@ -235,6 +237,8 @@ public:
return _hw_features;
}
net::hw_features& hw_features_ref() { return _hw_features; }
const rte_eth_rxconf* def_rx_conf() const {
#ifdef RTE_VERSION_1_7
return &_rx_conf_default;
@@ -1043,12 +1047,22 @@ private:
*/
packet from_mbuf(rte_mbuf* m);
/**
* Transform an LRO rte_mbuf cluster into the "packet" object.
* @param m HEAD of the mbufs' cluster to transform
*
* @return a "packet" object representing the newly received LRO packet.
*/
packet from_mbuf_lro(rte_mbuf* m);
private:
dpdk_device* _dev;
uint8_t _qid;
rte_mempool *_pktmbuf_pool_rx;
std::vector<rte_mbuf*> _rx_free_pkts;
std::vector<rte_mbuf*> _rx_free_bufs;
std::vector<fragment> _frags;
std::vector<char*> _bufs;
size_t _num_rx_free_segs = 0;
reactor::poller _rx_gc_poller;
std::unique_ptr<void, free_deleter> _rx_xmem;
@@ -1133,6 +1147,19 @@ int dpdk_device::init_port_start()
port_conf.rxmode.hw_vlan_strip = 1;
}
// Enable HW CRC stripping
port_conf.rxmode.hw_strip_crc = 1;
#ifdef RTE_ETHDEV_HAS_LRO_SUPPORT
// Enable LRO
if (_use_lro && (_dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)) {
printf("LRO is on\n");
port_conf.rxmode.enable_lro = 1;
_hw_features.rx_lro = true;
} else
#endif
printf("LRO is off\n");
// Check that all CSUM features are either all set all together or not set
// all together. If this assumption breaks we need to rework the below logic
// by splitting the csum offload feature bit into separate bits for IPv4,
@@ -1413,47 +1440,112 @@ void dpdk_qp<HugetlbfsMemBackend>::rx_start() {
}
template<>
inline packet dpdk_qp<false>::from_mbuf(rte_mbuf* m)
inline packet dpdk_qp<false>::from_mbuf_lro(rte_mbuf* m)
{
//
// Try to allocate a buffer for packet's data. If we fail - give the
// application an mbuf itself. If we succeed - copy the data into this
// buffer, create a packet based on this buffer and return the mbuf to its
// pool.
// Try to allocate a buffer for the whole packet's data.
// If we fail - construct the packet from mbufs.
// If we succeed - copy the data into this buffer, create a packet based on
// this buffer and return the mbuf to its pool.
//
auto len = rte_pktmbuf_data_len(m);
char* buf = (char*)malloc(len);
auto pkt_len = rte_pktmbuf_pkt_len(m);
char* buf = (char*)malloc(pkt_len);
if (!buf) {
fragment f{rte_pktmbuf_mtod(m, char*), len};
return packet(f, make_deleter(deleter(), [m] { rte_pktmbuf_free(m); }));
_frags.clear();
for (rte_mbuf* m1 = m; m1 != nullptr; m1 = m1->next) {
char* data = rte_pktmbuf_mtod(m1, char*);
_frags.emplace_back(fragment{data, rte_pktmbuf_data_len(m1)});
}
return packet(_frags.begin(), _frags.end(),
make_deleter(deleter(), [m] { rte_pktmbuf_free(m); }));
} else {
rte_memcpy(buf, rte_pktmbuf_mtod(m, char*), len);
// Copy the contents of the packet into the buffer we've just allocated
size_t offset = 0;
for (rte_mbuf* m1 = m; m1 != nullptr; m1 = m1->next) {
char* data = rte_pktmbuf_mtod(m1, char*);
auto len = rte_pktmbuf_data_len(m1);
rte_memcpy(buf + offset, data, len);
offset += len;
}
rte_pktmbuf_free(m);
fragment f{buf, len};
return packet(f, make_free_deleter(buf));
return packet(fragment{buf, pkt_len}, make_free_deleter(buf));
}
}
template<>
inline packet dpdk_qp<false>::from_mbuf(rte_mbuf* m)
{
if (!_dev->hw_features_ref().rx_lro || rte_pktmbuf_is_contiguous(m)) {
//
// Try to allocate a buffer for packet's data. If we fail - give the
// application an mbuf itself. If we succeed - copy the data into this
// buffer, create a packet based on this buffer and return the mbuf to
// its pool.
//
auto len = rte_pktmbuf_data_len(m);
char* buf = (char*)malloc(len);
if (!buf) {
return packet(fragment{rte_pktmbuf_mtod(m, char*), len},
make_deleter(deleter(),
[m] { rte_pktmbuf_free(m); }));
} else {
rte_memcpy(buf, rte_pktmbuf_mtod(m, char*), len);
rte_pktmbuf_free(m);
return packet(fragment{buf, len}, make_free_deleter(buf));
}
} else {
return from_mbuf_lro(m);
}
}
template<>
inline packet dpdk_qp<true>::from_mbuf_lro(rte_mbuf* m)
{
_frags.clear();
_bufs.clear();
for (; m != nullptr; m = m->next) {
char* data = rte_pktmbuf_mtod(m, char*);
_frags.emplace_back(fragment{data, rte_pktmbuf_data_len(m)});
_bufs.push_back(data);
}
return packet(_frags.begin(), _frags.end(),
make_deleter(deleter(),
[bufs_vec = std::move(_bufs)] {
for (auto&& b : bufs_vec) {
free(b);
}
}));
}
template<>
inline packet dpdk_qp<true>::from_mbuf(rte_mbuf* m)
{
char* data = rte_pktmbuf_mtod(m, char*);
fragment f{data, rte_pktmbuf_data_len(m)};
packet p(f, make_free_deleter(data));
_rx_free_pkts.push_back(m);
_num_rx_free_segs += rte_mbuf_nb_segs(m);
return p;
if (!_dev->hw_features_ref().rx_lro || rte_pktmbuf_is_contiguous(m)) {
char* data = rte_pktmbuf_mtod(m, char*);
return packet(fragment{data, rte_pktmbuf_data_len(m)},
make_free_deleter(data));
} else {
return from_mbuf_lro(m);
}
}
template <bool HugetlbfsMemBackend>
inline bool dpdk_qp<HugetlbfsMemBackend>::refill_one_cluster(rte_mbuf* head)
{
while (head != NULL) {
struct rte_mbuf *m_next = head->next;
for (; head != nullptr; head = head->next) {
if (!refill_rx_mbuf(head)) {
//
// If we failed to allocate a new buffer - push the rest of the
@@ -1463,7 +1555,6 @@ inline bool dpdk_qp<HugetlbfsMemBackend>::refill_one_cluster(rte_mbuf* head)
return false;
}
_rx_free_bufs.push_back(head);
head = m_next;
}
return true;
@@ -1517,12 +1608,6 @@ void dpdk_qp<HugetlbfsMemBackend>::process_packets(
struct rte_mbuf *m = bufs[i];
offload_info oi;
// TODO: Remove this when implement LRO support
if (!rte_pktmbuf_is_contiguous(m)) {
rte_exit(EXIT_FAILURE,
"DPDK-Rx: Have got a fragmented buffer - not supported\n");
}
packet p = from_mbuf(m);
// Set stipped VLAN value if available
@@ -1631,7 +1716,8 @@ std::unique_ptr<qp> dpdk_device::init_local_queue(boost::program_options::variab
std::unique_ptr<net::device> create_dpdk_net_device(
uint8_t port_idx,
uint8_t num_queues)
uint8_t num_queues,
bool use_lro)
{
static bool called = false;
@@ -1647,7 +1733,7 @@ std::unique_ptr<net::device> create_dpdk_net_device(
printf("ports number: %d\n", rte_eth_dev_count());
}
return std::make_unique<dpdk::dpdk_device>(port_idx, num_queues);
return std::make_unique<dpdk::dpdk_device>(port_idx, num_queues, use_lro);
}
boost::program_options::options_description

View File

@@ -30,7 +30,8 @@
std::unique_ptr<net::device> create_dpdk_net_device(
uint8_t port_idx = 0,
uint8_t num_queues = 1);
uint8_t num_queues = 1,
bool use_lro = true);
boost::program_options::options_description get_dpdk_net_options_description();

View File

@@ -85,7 +85,8 @@ void create_native_net_device(boost::program_options::variables_map opts) {
if (opts.count("dpdk-pmd")) {
// Hardcoded port index 0.
// TODO: Inherit it from the opts
dev = create_dpdk_net_device(0, smp::count);
dev = create_dpdk_net_device(0, smp::count,
!(opts.count("lro") && opts["lro"].as<std::string>() == "off"));
} else
#endif
dev = create_virtio_net_device(opts);
@@ -325,6 +326,9 @@ boost::program_options::options_description nns_options() {
#ifdef HAVE_DPDK
("dpdk-pmd", "Use DPDK PMD drivers")
#endif
("lro",
boost::program_options::value<std::string>()->default_value("on"),
"Enable LRO")
;
add_native_net_options_description(opts);

View File

@@ -71,6 +71,8 @@ struct hw_features {
bool tx_csum_l4_offload = false;
// Enable rx checksum offload
bool rx_csum_offload = false;
// LRO is enabled
bool rx_lro = false;
// Enable tx TCP segment offload
bool tx_tso = false;
// Enable tx UDP fragmentation offload

View File

@@ -87,11 +87,18 @@ private:
}
if (!(_opts.count("tso") && _opts["tso"].as<std::string>() == "off")) {
seastar_supported_features |= VIRTIO_NET_F_HOST_TSO4;
seastar_supported_features |= VIRTIO_NET_F_GUEST_TSO4;
_hw_features.tx_tso = true;
} else {
_hw_features.tx_tso = false;
}
if (!(_opts.count("lro") && _opts["lro"].as<std::string>() == "off")) {
seastar_supported_features |= VIRTIO_NET_F_GUEST_TSO4;
_hw_features.rx_lro = true;
} else {
_hw_features.rx_lro = false;
}
if (!(_opts.count("ufo") && _opts["ufo"].as<std::string>() == "off")) {
seastar_supported_features |= VIRTIO_NET_F_HOST_UFO;
seastar_supported_features |= VIRTIO_NET_F_GUEST_UFO;