Merge branch 'zero-copy-tx-20' of github.com:cloudius-systems/seastar-dev

dpdk zero-copy tx, from Vlad:

"This patch series introduces zero-copy Tx with DPDK networking backend:
 - Split the dpdk_qp mempool into separate pools for Rx and Tx queues.
 - Configure the dpdk_qp mempools to use external memory buffer when we
   can ensure pinning and virt2phys translation (currently only when
   running on top of hugetlbfs).
 - Properly divide the memory between seastar and DPDK when running on
   top of hugetlbfs.
 - Tx zero-copy itself. See more details in the PATCH7 description."
This commit is contained in:
Avi Kivity
2015-02-12 11:56:46 +02:00
8 changed files with 891 additions and 227 deletions

View File

@@ -229,7 +229,7 @@ if args.with_osv:
if args.dpdk_target:
args.user_cflags = (args.user_cflags +
' -DHAVE_DPDK -I' +
args.dpdk_target + '/include -Wno-error=literal-suffix -Wno-literal-suffix')
args.dpdk_target + '/include -Wno-error=literal-suffix -Wno-literal-suffix -Wno-invalid-offsetof')
libs += (' -L' + args.dpdk_target + '/lib ' +
'-Wl,--whole-archive -lrte_pmd_bond -lrte_pmd_vmxnet3_uio -lrte_pmd_virtio_uio -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_pmd_e1000 -lrte_pmd_ring -Wl,--no-whole-archive -lrte_distributor -lrte_kni -lrte_pipeline -lrte_table -lrte_port -lrte_timer -lrte_hash -lrte_lpm -lrte_power -lrte_acl -lrte_meter -lrte_sched -lrte_kvargs -lrte_mbuf -lrte_ip_frag -lethdev -lrte_eal -lrte_malloc -lrte_mempool -lrte_ring -lrte_cmdline -lrte_cfgfile -lrt -lm -ldl')

View File

@@ -1,5 +1,6 @@
#ifdef HAVE_DPDK
#include "net/dpdk.hh"
#include "core/dpdk_rte.hh"
#include "util/conversions.hh"
#include <experimental/optional>
@@ -35,6 +36,18 @@ void eal::init(cpuset cpus, boost::program_options::variables_map opts)
if (hugepages_path) {
args.push_back(string2vector("--huge-dir"));
args.push_back(string2vector(hugepages_path.value()));
//
// We don't know what is going to be our networking configuration so we
// assume there is going to be a queue per-CPU. Plus we'll give a DPDK
// 64MB for "other stuff".
//
size_t size_MB = mem_size(cpus.count()) >> 20;
std::stringstream size_MB_str;
size_MB_str << size_MB;
args.push_back(string2vector("-m"));
args.push_back(string2vector(size_MB_str.str()));
} else if (!opts.count("dpdk-pmd")) {
args.push_back(string2vector("--no-huge"));
}
@@ -61,6 +74,23 @@ void eal::init(cpuset cpus, boost::program_options::variables_map opts)
initialized = true;
}
size_t eal::mem_size(int num_cpus)
{
size_t memsize = 0;
//
// PMD mempool memory:
//
// We don't know what is going to be our networking configuration so we
// assume there is going to be a queue per-CPU.
//
memsize += num_cpus * qp_mempool_obj_size();
// Plus we'll give a DPDK 64MB for "other stuff".
memsize += (64UL << 20);
return memsize;
}
} // namespace dpdk
#endif // HAVE_DPDK

View File

@@ -28,6 +28,9 @@
#define rte_mbuf_nb_segs(m) ((m)->pkt.nb_segs)
#define rte_mbuf_l2_len(m) ((m)->pkt.vlan_macip.f.l2_len)
#define rte_mbuf_l3_len(m) ((m)->pkt.vlan_macip.f.l3_len)
#define rte_mbuf_buf_addr(m) ((m)->pkt.buf_addr)
#define rte_mbuf_buf_physaddr(m) ((m)->pkt.buf_physaddr)
#define rte_mbuf_data_off(m) ((m)->pkt.data_off)
#else
@@ -39,6 +42,9 @@
#define rte_mbuf_nb_segs(m) ((m)->nb_segs)
#define rte_mbuf_l2_len(m) ((m)->l2_len)
#define rte_mbuf_l3_len(m) ((m)->l3_len)
#define rte_mbuf_buf_addr(m) ((m)->buf_addr)
#define rte_mbuf_buf_physaddr(m) ((m)->buf_physaddr)
#define rte_mbuf_data_off(m) ((m)->data_off)
#endif
@@ -52,6 +58,13 @@ public:
using cpuset = std::bitset<RTE_MAX_LCORE>;
static void init(cpuset cpus, boost::program_options::variables_map opts);
/**
* Returns the amount of memory needed for DPDK
* @param num_cpus Number of CPUs the application is going to use
*
* @return
*/
static size_t mem_size(int num_cpus);
static bool initialized;
};

View File

@@ -56,9 +56,6 @@
namespace memory {
static constexpr const size_t page_bits = 12;
static constexpr const size_t page_size = 1 << page_bits;
static constexpr const size_t huge_page_size = 512 * page_size;
static constexpr const unsigned cpu_id_shift = 36; // FIXME: make dynamic
static constexpr const unsigned max_cpus = 256;
static constexpr const size_t cache_line_size = 64;

View File

@@ -12,6 +12,11 @@
namespace memory {
// TODO: Use getpagesize() in order to learn a size of a system PAGE.
static constexpr const size_t page_bits = 12;
static constexpr const size_t page_size = 1 << page_bits; // 4K
static constexpr const size_t huge_page_size = 512 * page_size; // 2M
void configure(std::vector<resource::memory> m,
std::experimental::optional<std::string> hugetlbfs_path = {});

View File

@@ -1268,6 +1268,26 @@ void smp::configure(boost::program_options::variables_map configuration)
resource::configuration rc;
if (configuration.count("memory")) {
rc.total_memory = parse_memory_size(configuration["memory"].as<std::string>());
#ifdef HAVE_DPDK
if (configuration.count("hugepages") &&
!configuration["network-stack"].as<std::string>().compare("native") &&
configuration.count("dpdk-pmd")) {
size_t dpdk_memory = dpdk::eal::mem_size(smp::count);
if (dpdk_memory >= rc.total_memory) {
std::cerr<<"Can't run with the given amount of memory: ";
std::cerr<<configuration["memory"].as<std::string>();
std::cerr<<". Consider giving more."<<std::endl;
exit(1);
}
//
// Subtract the memory we are about to give to DPDK from the total
// amount of memory we are allowed to use.
//
rc.total_memory.value() -= dpdk_memory;
}
#endif
}
if (configuration.count("reserve-memory")) {
rc.reserve_memory = parse_memory_size(configuration["reserve-memory"].as<std::string>());

File diff suppressed because it is too large Load Diff

View File

@@ -17,6 +17,13 @@ std::unique_ptr<net::device> create_dpdk_net_device(
boost::program_options::options_description get_dpdk_net_options_description();
namespace dpdk {
/**
* @return Number of bytes needed for mempool objects of each QP.
*/
uint32_t qp_mempool_obj_size();
}
#endif // _SEASTAR_DPDK_DEV_H
#endif // HAVE_DPDK