/* * Copyright (C) 2014 Cloudius Systems, Ltd. */ #ifndef PREFETCH_HH_ #define PREFETCH_HH_ #include #include #include #include "align.hh" static constexpr size_t cacheline_size = 64; template struct prefetcher; template struct prefetcher<0, RW, LOC> { prefetcher(uintptr_t ptr) {} }; template struct prefetcher { prefetcher(uintptr_t ptr) { __builtin_prefetch(reinterpret_cast(ptr), RW, LOC); std::atomic_signal_fence(std::memory_order_seq_cst); prefetcher(ptr + 64); } }; // LOC is a locality from __buitin_prefetch() gcc documentation: // "The value locality must be a compile-time constant integer between zero and three. A value of // zero means that the data has no temporal locality, so it need not be left in the cache after // the access. A value of three means that the data has a high degree of temporal locality and // should be left in all levels of cache possible. Values of one and two mean, respectively, a // low or moderate degree of temporal locality. The default is three." template void prefetch(T* ptr) { prefetcher(reinterpret_cast(ptr)); } template void prefetch(Iterator begin, Iterator end) { std::for_each(begin, end, [] (auto v) { prefetch(v); }); } template void prefetch_n(T** pptr) { boost::mpl::for_each< boost::mpl::range_c >( [pptr] (size_t x) { prefetch(*(pptr + x)); } ); } template void prefetch(void* ptr) { prefetcher(reinterpret_cast(ptr)); } template void prefetch_n(Iterator begin, Iterator end) { std::for_each(begin, end, [] (auto v) { prefetch(v); }); } template void prefetch_n(T** pptr) { boost::mpl::for_each< boost::mpl::range_c >( [pptr] (size_t x) { prefetch(*(pptr + x)); } ); } template void prefetchw(T* ptr) { prefetcher(reinterpret_cast(ptr)); } template void prefetchw_n(Iterator begin, Iterator end) { std::for_each(begin, end, [] (auto v) { prefetchw(v); }); } template void prefetchw_n(T** pptr) { boost::mpl::for_each< boost::mpl::range_c >( [pptr] (size_t x) { prefetchw(*(pptr + x)); } ); } template void prefetchw(void* ptr) { prefetcher(reinterpret_cast(ptr)); } template void prefetchw_n(Iterator begin, Iterator end) { std::for_each(begin, end, [] (auto v) { prefetchw(v); }); } template void prefetchw_n(T** pptr) { boost::mpl::for_each< boost::mpl::range_c >( [pptr] (size_t x) { prefetchw(*(pptr + x)); } ); } #endif