From d77d0cddcaf4aa25f17e967b5c860646e793f774 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 15 Jan 2015 18:22:07 +0200 Subject: [PATCH 1/2] posix: implement pread() interface --- core/posix.hh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/posix.hh b/core/posix.hh index 18163e9a9c..1a8548e6a8 100644 --- a/core/posix.hh +++ b/core/posix.hh @@ -242,6 +242,11 @@ public: throw_system_error_on(r == -1); return { size_t(r) }; } + size_t pread(void* buf, size_t len, off_t off) { + auto r = ::pread(_fd, buf, len, off); + throw_system_error_on(r == -1); + return size_t(r); + } void timerfd_settime(int flags, const itimerspec& its) { auto r = ::timerfd_settime(_fd, flags, &its, NULL); throw_system_error_on(r == -1); From 80bcf3f973798686fe8b0f6c79ec931ef30c839e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 15 Jan 2015 18:23:19 +0200 Subject: [PATCH 2/2] memory: provide virt-to-phys translation API Convert virtual addresses to physical addresses. Only works when --hugepages is in effect. --- core/memory.cc | 61 ++++++++++++++++++++++++++++++++++++++++++++++++-- core/memory.hh | 16 +++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/core/memory.cc b/core/memory.cc index 0b7d55428a..31bc93cf96 100644 --- a/core/memory.cc +++ b/core/memory.cc @@ -80,7 +80,7 @@ using allocate_system_memory_fn namespace bi = boost::intrusive; inline -unsigned object_cpu_id(void* ptr) { +unsigned object_cpu_id(const void* ptr) { return (reinterpret_cast(ptr) >> cpu_id_shift) & 0xff; } @@ -262,6 +262,7 @@ struct cpu_pages { } fsu; small_pool_array small_pools; alignas(cache_line_size) std::atomic xcpu_freelist; + alignas(cache_line_size) std::vector virt_to_phys_map; static std::atomic cpu_id_gen; static cpu_pages* all_cpus[max_cpus]; char* mem() { return memory; } @@ -295,6 +296,8 @@ struct cpu_pages { void resize(size_t new_size, allocate_system_memory_fn alloc_sys_mem); void do_resize(size_t new_size, allocate_system_memory_fn alloc_sys_mem); void replace_memory_backing(allocate_system_memory_fn alloc_sys_mem); + void init_virt_to_phys_map(); + translation translate(const void* addr, size_t size); }; static thread_local cpu_pages cpu_mem; @@ -535,7 +538,7 @@ allocate_hugetlbfs_memory(file_desc& fd, optional where, size_t how_much) auto ret = fd.map( how_much, PROT_READ | PROT_WRITE, - MAP_SHARED | (where ? MAP_FIXED : 0), + MAP_SHARED | MAP_POPULATE | (where ? MAP_FIXED : 0), pos, where.value_or(nullptr)); return ret; @@ -555,6 +558,39 @@ void cpu_pages::replace_memory_backing(allocate_system_memory_fn alloc_sys_mem) std::memcpy(old_mem, relocated_old_mem.get(), bytes); } +void cpu_pages::init_virt_to_phys_map() { + auto nr_entries = nr_pages / (huge_page_size / page_size); + virt_to_phys_map.resize(nr_entries); + auto fd = file_desc::open("/proc/self/pagemap", O_RDONLY | O_CLOEXEC); + for (size_t i = 0; i != nr_entries; ++i) { + uint64_t entry = 0; + auto phys = std::numeric_limits::max(); + auto pfn = reinterpret_cast(mem() + i * huge_page_size) / page_size; + fd.pread(&entry, 8, pfn * 8); + if (entry & 0x8000'0000'0000'0000) { + phys = (entry & 0x003f'ffff'ffff'ffff) << page_bits; + } + virt_to_phys_map[i] = phys; + } +} + +translation +cpu_pages::translate(const void* addr, size_t size) { + auto a = reinterpret_cast(addr) - reinterpret_cast(mem()); + auto pfn = a / huge_page_size; + if (pfn >= virt_to_phys_map.size()) { + return {}; + } + auto phys = virt_to_phys_map[pfn]; + if (phys == std::numeric_limits::max()) { + return {}; + } + auto translation_size = align_up(a + 1, huge_page_size) - a; + size = std::min(size, translation_size); + phys += a & (huge_page_size - 1); + return translation{phys, size}; +} + void cpu_pages::do_resize(size_t new_size, allocate_system_memory_fn alloc_sys_mem) { auto new_pages = new_size / page_size; if (new_pages <= nr_pages) { @@ -798,6 +834,9 @@ void configure(std::vector m, cpu_mem.replace_memory_backing(sys_alloc); } cpu_mem.resize(total, sys_alloc); + if (hugetlbfs_path) { + cpu_mem.init_virt_to_phys_map(); + } size_t pos = 0; for (auto&& x : m) { #ifdef HAVE_NUMA @@ -820,6 +859,19 @@ bool drain_cross_cpu_freelist() { return cpu_mem.drain_cross_cpu_freelist(); } +translation +translate(const void* addr, size_t size) { + auto cpu_id = object_cpu_id(addr); + if (cpu_id >= max_cpus) { + return {}; + } + auto cp = cpu_pages::all_cpus[cpu_id]; + if (!cp) { + return {}; + } + return cp->translate(addr, size); +} + } using namespace memory; @@ -1066,6 +1118,11 @@ bool drain_cross_cpu_freelist() { return false; } +translation +translate(const void* addr, size_t size) { + return {}; +} + } #endif diff --git a/core/memory.hh b/core/memory.hh index 8fe4933b37..410da85a6b 100644 --- a/core/memory.hh +++ b/core/memory.hh @@ -40,6 +40,22 @@ bool drain_cross_cpu_freelist(); void set_reclaim_hook( std::function)> hook); +using physical_address = uint64_t; + +struct translation { + translation() = default; + translation(physical_address a, size_t s) : addr(a), size(s) {} + physical_address addr = 0; + size_t size = 0; +}; + +// Translate a virtual address range to a physical range. +// +// Can return a smaller range (in which case the reminder needs +// to be translated again), or a zero sized range in case the +// translation is not known. +translation translate(const void* addr, size_t size); + class statistics; statistics stats();