mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-19 16:15:07 +00:00
Merge branch 'zero-copy-tx-20' of github.com:cloudius-systems/seastar-dev
dpdk zero-copy tx, from Vlad: "This patch series introduces zero-copy Tx with DPDK networking backend: - Split the dpdk_qp mempool into separate pools for Rx and Tx queues. - Configure the dpdk_qp mempools to use external memory buffer when we can ensure pinning and virt2phys translation (currently only when running on top of hugetlbfs). - Properly divide the memory between seastar and DPDK when running on top of hugetlbfs. - Tx zero-copy itself. See more details in the PATCH7 description."
This commit is contained in:
@@ -229,7 +229,7 @@ if args.with_osv:
|
||||
if args.dpdk_target:
|
||||
args.user_cflags = (args.user_cflags +
|
||||
' -DHAVE_DPDK -I' +
|
||||
args.dpdk_target + '/include -Wno-error=literal-suffix -Wno-literal-suffix')
|
||||
args.dpdk_target + '/include -Wno-error=literal-suffix -Wno-literal-suffix -Wno-invalid-offsetof')
|
||||
libs += (' -L' + args.dpdk_target + '/lib ' +
|
||||
'-Wl,--whole-archive -lrte_pmd_bond -lrte_pmd_vmxnet3_uio -lrte_pmd_virtio_uio -lrte_pmd_i40e -lrte_pmd_ixgbe -lrte_pmd_e1000 -lrte_pmd_ring -Wl,--no-whole-archive -lrte_distributor -lrte_kni -lrte_pipeline -lrte_table -lrte_port -lrte_timer -lrte_hash -lrte_lpm -lrte_power -lrte_acl -lrte_meter -lrte_sched -lrte_kvargs -lrte_mbuf -lrte_ip_frag -lethdev -lrte_eal -lrte_malloc -lrte_mempool -lrte_ring -lrte_cmdline -lrte_cfgfile -lrt -lm -ldl')
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#ifdef HAVE_DPDK
|
||||
|
||||
#include "net/dpdk.hh"
|
||||
#include "core/dpdk_rte.hh"
|
||||
#include "util/conversions.hh"
|
||||
#include <experimental/optional>
|
||||
@@ -35,6 +36,18 @@ void eal::init(cpuset cpus, boost::program_options::variables_map opts)
|
||||
if (hugepages_path) {
|
||||
args.push_back(string2vector("--huge-dir"));
|
||||
args.push_back(string2vector(hugepages_path.value()));
|
||||
|
||||
//
|
||||
// We don't know what is going to be our networking configuration so we
|
||||
// assume there is going to be a queue per-CPU. Plus we'll give a DPDK
|
||||
// 64MB for "other stuff".
|
||||
//
|
||||
size_t size_MB = mem_size(cpus.count()) >> 20;
|
||||
std::stringstream size_MB_str;
|
||||
size_MB_str << size_MB;
|
||||
|
||||
args.push_back(string2vector("-m"));
|
||||
args.push_back(string2vector(size_MB_str.str()));
|
||||
} else if (!opts.count("dpdk-pmd")) {
|
||||
args.push_back(string2vector("--no-huge"));
|
||||
}
|
||||
@@ -61,6 +74,23 @@ void eal::init(cpuset cpus, boost::program_options::variables_map opts)
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
size_t eal::mem_size(int num_cpus)
|
||||
{
|
||||
size_t memsize = 0;
|
||||
//
|
||||
// PMD mempool memory:
|
||||
//
|
||||
// We don't know what is going to be our networking configuration so we
|
||||
// assume there is going to be a queue per-CPU.
|
||||
//
|
||||
memsize += num_cpus * qp_mempool_obj_size();
|
||||
|
||||
// Plus we'll give a DPDK 64MB for "other stuff".
|
||||
memsize += (64UL << 20);
|
||||
|
||||
return memsize;
|
||||
}
|
||||
|
||||
} // namespace dpdk
|
||||
|
||||
#endif // HAVE_DPDK
|
||||
|
||||
@@ -28,6 +28,9 @@
|
||||
#define rte_mbuf_nb_segs(m) ((m)->pkt.nb_segs)
|
||||
#define rte_mbuf_l2_len(m) ((m)->pkt.vlan_macip.f.l2_len)
|
||||
#define rte_mbuf_l3_len(m) ((m)->pkt.vlan_macip.f.l3_len)
|
||||
#define rte_mbuf_buf_addr(m) ((m)->pkt.buf_addr)
|
||||
#define rte_mbuf_buf_physaddr(m) ((m)->pkt.buf_physaddr)
|
||||
#define rte_mbuf_data_off(m) ((m)->pkt.data_off)
|
||||
|
||||
#else
|
||||
|
||||
@@ -39,6 +42,9 @@
|
||||
#define rte_mbuf_nb_segs(m) ((m)->nb_segs)
|
||||
#define rte_mbuf_l2_len(m) ((m)->l2_len)
|
||||
#define rte_mbuf_l3_len(m) ((m)->l3_len)
|
||||
#define rte_mbuf_buf_addr(m) ((m)->buf_addr)
|
||||
#define rte_mbuf_buf_physaddr(m) ((m)->buf_physaddr)
|
||||
#define rte_mbuf_data_off(m) ((m)->data_off)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -52,6 +58,13 @@ public:
|
||||
using cpuset = std::bitset<RTE_MAX_LCORE>;
|
||||
|
||||
static void init(cpuset cpus, boost::program_options::variables_map opts);
|
||||
/**
|
||||
* Returns the amount of memory needed for DPDK
|
||||
* @param num_cpus Number of CPUs the application is going to use
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
static size_t mem_size(int num_cpus);
|
||||
static bool initialized;
|
||||
};
|
||||
|
||||
|
||||
@@ -56,9 +56,6 @@
|
||||
|
||||
namespace memory {
|
||||
|
||||
static constexpr const size_t page_bits = 12;
|
||||
static constexpr const size_t page_size = 1 << page_bits;
|
||||
static constexpr const size_t huge_page_size = 512 * page_size;
|
||||
static constexpr const unsigned cpu_id_shift = 36; // FIXME: make dynamic
|
||||
static constexpr const unsigned max_cpus = 256;
|
||||
static constexpr const size_t cache_line_size = 64;
|
||||
|
||||
@@ -12,6 +12,11 @@
|
||||
|
||||
namespace memory {
|
||||
|
||||
// TODO: Use getpagesize() in order to learn a size of a system PAGE.
|
||||
static constexpr const size_t page_bits = 12;
|
||||
static constexpr const size_t page_size = 1 << page_bits; // 4K
|
||||
static constexpr const size_t huge_page_size = 512 * page_size; // 2M
|
||||
|
||||
void configure(std::vector<resource::memory> m,
|
||||
std::experimental::optional<std::string> hugetlbfs_path = {});
|
||||
|
||||
|
||||
@@ -1268,6 +1268,26 @@ void smp::configure(boost::program_options::variables_map configuration)
|
||||
resource::configuration rc;
|
||||
if (configuration.count("memory")) {
|
||||
rc.total_memory = parse_memory_size(configuration["memory"].as<std::string>());
|
||||
#ifdef HAVE_DPDK
|
||||
if (configuration.count("hugepages") &&
|
||||
!configuration["network-stack"].as<std::string>().compare("native") &&
|
||||
configuration.count("dpdk-pmd")) {
|
||||
size_t dpdk_memory = dpdk::eal::mem_size(smp::count);
|
||||
|
||||
if (dpdk_memory >= rc.total_memory) {
|
||||
std::cerr<<"Can't run with the given amount of memory: ";
|
||||
std::cerr<<configuration["memory"].as<std::string>();
|
||||
std::cerr<<". Consider giving more."<<std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//
|
||||
// Subtract the memory we are about to give to DPDK from the total
|
||||
// amount of memory we are allowed to use.
|
||||
//
|
||||
rc.total_memory.value() -= dpdk_memory;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (configuration.count("reserve-memory")) {
|
||||
rc.reserve_memory = parse_memory_size(configuration["reserve-memory"].as<std::string>());
|
||||
|
||||
1038
net/dpdk.cc
1038
net/dpdk.cc
File diff suppressed because it is too large
Load Diff
@@ -17,6 +17,13 @@ std::unique_ptr<net::device> create_dpdk_net_device(
|
||||
|
||||
boost::program_options::options_description get_dpdk_net_options_description();
|
||||
|
||||
namespace dpdk {
|
||||
/**
|
||||
* @return Number of bytes needed for mempool objects of each QP.
|
||||
*/
|
||||
uint32_t qp_mempool_obj_size();
|
||||
}
|
||||
|
||||
#endif // _SEASTAR_DPDK_DEV_H
|
||||
|
||||
#endif // HAVE_DPDK
|
||||
|
||||
Reference in New Issue
Block a user