Add oneDPL for std models
This commit is contained in:
parent
5197a4e561
commit
dfb4eb06b2
@ -76,13 +76,15 @@ option(USE_TBB "Enable oneTBB library for *supported* models. Enabling this on m
|
|||||||
model on how this is used." OFF)
|
model on how this is used." OFF)
|
||||||
|
|
||||||
if (USE_TBB)
|
if (USE_TBB)
|
||||||
include(FetchContent)
|
|
||||||
FetchContent_Declare(
|
FetchContent_Declare(
|
||||||
TBB
|
TBB
|
||||||
GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git
|
GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git
|
||||||
GIT_TAG faaf43c4ab22cb4b4267d65d5e218fa58800eea8
|
GIT_TAG faaf43c4ab22cb4b4267d65d5e218fa58800eea8
|
||||||
)
|
)
|
||||||
# Not using FetchContent_MakeAvailable because we need EXCLUDE_FROM_ALL
|
# Don't fail builds on waring (TBB has -Wall while not being free of warnings from unused symbols...)
|
||||||
|
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||||
|
set(TBB_STRICT OFF)
|
||||||
|
# Not using FetchContent_MakeAvailable (CMake>= 3.14) because we need EXCLUDE_FROM_ALL
|
||||||
FetchContent_GetProperties(TBB)
|
FetchContent_GetProperties(TBB)
|
||||||
if (NOT TBB_POPULATED)
|
if (NOT TBB_POPULATED)
|
||||||
FetchContent_Populate(TBB)
|
FetchContent_Populate(TBB)
|
||||||
@ -90,6 +92,25 @@ if (USE_TBB)
|
|||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
option(USE_TBB "Enable oneDPL library for *supported* models. Enabling this on models that
|
||||||
|
don't explicitly link against DPL is a no-op, see description of your selected
|
||||||
|
model on how this is used." OFF)
|
||||||
|
|
||||||
|
if (USE_ONEDPL)
|
||||||
|
FetchContent_Declare(
|
||||||
|
oneDPL
|
||||||
|
GIT_REPOSITORY https://github.com/oneapi-src/oneDPL.git
|
||||||
|
GIT_TAG oneDPL-2021.7.0-release
|
||||||
|
)
|
||||||
|
# Not using FetchContent_MakeAvailable (CMake>= 3.14) because we need EXCLUDE_FROM_ALL
|
||||||
|
FetchContent_GetProperties(oneDPL)
|
||||||
|
if (NOT oneDPL_POPULATED)
|
||||||
|
FetchContent_Populate(oneDPL)
|
||||||
|
add_subdirectory(${onedpl_SOURCE_DIR} ${onedpl_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||||
|
endif ()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
# include our macros
|
# include our macros
|
||||||
include(cmake/register_models.cmake)
|
include(cmake/register_models.cmake)
|
||||||
|
|
||||||
@ -170,6 +191,7 @@ include_directories(src)
|
|||||||
add_executable(${EXE_NAME} ${IMPL_SOURCES} src/main.cpp)
|
add_executable(${EXE_NAME} ${IMPL_SOURCES} src/main.cpp)
|
||||||
target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES})
|
target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES})
|
||||||
target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS})
|
target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS})
|
||||||
|
target_include_directories(${EXE_NAME} PUBLIC ${IMPL_DIRECTORIES})
|
||||||
|
|
||||||
if (CXX_EXTRA_LIBRARIES)
|
if (CXX_EXTRA_LIBRARIES)
|
||||||
target_link_libraries(${EXE_NAME} PUBLIC ${CXX_EXTRA_LIBRARIES})
|
target_link_libraries(${EXE_NAME} PUBLIC ${CXX_EXTRA_LIBRARIES})
|
||||||
|
|||||||
@ -71,6 +71,10 @@ macro(register_definitions)
|
|||||||
list(APPEND IMPL_DEFINITIONS ${ARGN})
|
list(APPEND IMPL_DEFINITIONS ${ARGN})
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
|
macro(register_directories)
|
||||||
|
list(APPEND IMPL_DIRECTORIES ${ARGN})
|
||||||
|
endmacro()
|
||||||
|
|
||||||
macro(register_flag_required NAME DESCRIPTION)
|
macro(register_flag_required NAME DESCRIPTION)
|
||||||
list(APPEND CUSTOM_FLAGS_TRIPLE "${NAME}" "${DESCRIPTION}" ON "")
|
list(APPEND CUSTOM_FLAGS_TRIPLE "${NAME}" "${DESCRIPTION}" ON "")
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|||||||
27
cmake/shim_onedpl.cmake
Normal file
27
cmake/shim_onedpl.cmake
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
|
||||||
|
|
||||||
|
if (USE_ONEDPL)
|
||||||
|
# # XXX see https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-library-guide/top/oneapi-dpc-library-onedpl-overview.html
|
||||||
|
# # this is to avoid the system TBB headers (if exists) from having precedence which isn't compatible with oneDPL's par implementation
|
||||||
|
# register_definitions(
|
||||||
|
# PSTL_USE_PARALLEL_POLICIES=0
|
||||||
|
# _GLIBCXX_USE_TBB_PAR_BACKEND=0
|
||||||
|
# )
|
||||||
|
register_definitions(USE_ONEDPL)
|
||||||
|
if (USE_ONEDPL STREQUAL "TBB")
|
||||||
|
register_definitions(ONEDPL_USE_TBB_BACKEND=1)
|
||||||
|
# TBB will either be linked later (USE_TBB==ON) or via extra libraries, don't do anything here
|
||||||
|
elseif (USE_ONEDPL STREQUAL "OPENMP")
|
||||||
|
register_definitions(ONEDPL_USE_OPENMP_BACKEND=1)
|
||||||
|
# Link OpenMP via CMAKE
|
||||||
|
find_package(OpenMP REQUIRED)
|
||||||
|
register_link_library(OpenMP::OpenMP_CXX)
|
||||||
|
elseif (USE_ONEDPL STREQUAL "SYCL")
|
||||||
|
register_definitions(ONEDPL_USE_DPCPP_BACKEND=1)
|
||||||
|
# This needs a SYCL compiler, will fail if CXX doesn't SYCL2020
|
||||||
|
register_append_cxx_flags(ANY -fsycl-unnamed-lambda -fsycl)
|
||||||
|
else ()
|
||||||
|
message(FATAL_ERROR "Unsupported USE_ONEDPL backend: ${USE_ONEDPL}, see USE_ONEDPL flag description for available values.")
|
||||||
|
endif ()
|
||||||
|
register_directories(ANY ${onedpl_SOURCE_DIR}/include)
|
||||||
|
endif ()
|
||||||
75
src/dpl_shim.h
Normal file
75
src/dpl_shim.h
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
#ifndef ALIGNMENT
|
||||||
|
#define ALIGNMENT (2*1024*1024) // 2MB
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_ONEDPL
|
||||||
|
|
||||||
|
// oneDPL C++17 PSTL
|
||||||
|
|
||||||
|
#include <oneapi/dpl/execution>
|
||||||
|
#include <oneapi/dpl/algorithm>
|
||||||
|
#include <oneapi/dpl/numeric>
|
||||||
|
|
||||||
|
#ifdef ONEDPL_USE_DPCPP_BACKEND
|
||||||
|
|
||||||
|
#include <CL/sycl.hpp>
|
||||||
|
|
||||||
|
const static auto exe_policy = oneapi::dpl::execution::device_policy<>{
|
||||||
|
oneapi::dpl::execution::make_device_policy(cl::sycl::default_selector{})
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> using Allocator = sycl::usm_allocator<T, sycl::usm::alloc::shared>;
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
constexpr Allocator<T> alloc_vec() { return {exe_policy.queue()}; };
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
T *alloc_raw(size_t size) { return sycl::malloc_shared<T>(size, exe_policy.queue()); }
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void dealloc_raw(T *ptr) { sycl::free(ptr, exe_policy.queue()); }
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// auto exe_policy = dpl::execution::seq;
|
||||||
|
// auto exe_policy = dpl::execution::par;
|
||||||
|
static constexpr auto exe_policy = dpl::execution::par_unseq;
|
||||||
|
#define USE_STD_PTR_ALLOC_DEALLOC
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// Normal C++17 PSTL
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <execution>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
// auto exe_policy = std::execution::seq;
|
||||||
|
// auto exe_policy = std::execution::par;
|
||||||
|
static constexpr auto exe_policy = std::execution::par_unseq;
|
||||||
|
#define USE_STD_PTR_ALLOC_DEALLOC
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_STD_PTR_ALLOC_DEALLOC
|
||||||
|
|
||||||
|
template<typename T> using Allocator = std::allocator<T>;
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
constexpr Allocator<T> alloc_vec() { return {}; };
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
T *alloc_raw(size_t size) { return (T *) aligned_alloc(ALIGNMENT, sizeof(T) * size); }
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void dealloc_raw(T *ptr) { free(ptr); }
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -6,14 +6,6 @@
|
|||||||
|
|
||||||
#include "STDDataStream.h"
|
#include "STDDataStream.h"
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <execution>
|
|
||||||
#include <numeric>
|
|
||||||
|
|
||||||
#ifndef ALIGNMENT
|
|
||||||
#define ALIGNMENT (2*1024*1024) // 2MB
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef USE_VECTOR
|
#ifdef USE_VECTOR
|
||||||
#define BEGIN(x) (x).begin()
|
#define BEGIN(x) (x).begin()
|
||||||
#define END(x) (x).end()
|
#define END(x) (x).end()
|
||||||
@ -22,23 +14,39 @@
|
|||||||
#define END(x) ((x) + array_size)
|
#define END(x) ((x) + array_size)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// There are three execution policies:
|
|
||||||
// auto exe_policy = std::execution::seq;
|
|
||||||
// auto exe_policy = std::execution::par;
|
|
||||||
constexpr auto exe_policy = std::execution::par_unseq;
|
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
|
STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
|
||||||
noexcept : array_size{ARRAY_SIZE},
|
noexcept : array_size{ARRAY_SIZE},
|
||||||
#ifdef USE_VECTOR
|
#ifdef USE_VECTOR
|
||||||
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
|
a(ARRAY_SIZE, alloc_vec<T>()), b(ARRAY_SIZE, alloc_vec<T>()), c(ARRAY_SIZE, alloc_vec<T>())
|
||||||
#else
|
#else
|
||||||
a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
|
||||||
b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
|
||||||
c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE))
|
|
||||||
#endif
|
#endif
|
||||||
{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; }
|
{
|
||||||
|
std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
|
||||||
|
#if USE_ONEDPL
|
||||||
|
std::cout << "Using oneDPL backend: ";
|
||||||
|
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
||||||
|
std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
|
||||||
|
#elif defined(ONEDPL_USE_TBB_BACKEND)
|
||||||
|
std::cout << "TBB " TBB_VERSION_STRING;
|
||||||
|
#elif defined(ONEDPL_USE_OPENMP_BACKEND)
|
||||||
|
std::cout << "OpenMP";
|
||||||
|
#else
|
||||||
|
std::cout << "Default";
|
||||||
|
#endif
|
||||||
|
std::cout << std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
STDDataStream<T>::~STDDataStream() {
|
||||||
|
#ifndef USE_VECTOR
|
||||||
|
dealloc_raw(a);
|
||||||
|
dealloc_raw(b);
|
||||||
|
dealloc_raw(c);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
|
void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
// source code
|
// source code
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "dpl_shim.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
@ -22,7 +23,7 @@ class STDDataStream : public Stream<T>
|
|||||||
|
|
||||||
// Device side pointers
|
// Device side pointers
|
||||||
#ifdef USE_VECTOR
|
#ifdef USE_VECTOR
|
||||||
std::vector<T> a, b, c;
|
std::vector<T, Allocator<T>> a, b, c;
|
||||||
#else
|
#else
|
||||||
T *a, *b, *c;
|
T *a, *b, *c;
|
||||||
#endif
|
#endif
|
||||||
@ -30,7 +31,7 @@ class STDDataStream : public Stream<T>
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
STDDataStream(const int, int) noexcept;
|
STDDataStream(const int, int) noexcept;
|
||||||
~STDDataStream() = default;
|
~STDDataStream();
|
||||||
|
|
||||||
virtual void copy() override;
|
virtual void copy() override;
|
||||||
virtual void add() override;
|
virtual void add() override;
|
||||||
|
|||||||
@ -27,9 +27,23 @@ register_flag_optional(USE_TBB
|
|||||||
"No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
|
"No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
|
||||||
"OFF")
|
"OFF")
|
||||||
|
|
||||||
|
register_flag_optional(USE_ONEDPL
|
||||||
|
"Link oneDPL which implements C++17 executor policies (via execution_policy_tag) for different backends.
|
||||||
|
|
||||||
|
Possible values are:
|
||||||
|
OPENMP - Implements policies using OpenMP.
|
||||||
|
CMake will handle any flags needed to enable OpenMP if the compiler supports it.
|
||||||
|
TBB - Implements policies using TBB.
|
||||||
|
TBB must be linked via USE_TBB or be available in LD_LIBRARY_PATH.
|
||||||
|
SYCL - Implements policies through SYCL2020.
|
||||||
|
This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically."
|
||||||
|
"OFF")
|
||||||
|
|
||||||
macro(setup)
|
macro(setup)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/shim_onedpl.cmake)
|
||||||
|
|
||||||
if (NVHPC_OFFLOAD)
|
if (NVHPC_OFFLOAD)
|
||||||
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
||||||
# propagate flags to linker so that it links with the gpu stuff as well
|
# propagate flags to linker so that it links with the gpu stuff as well
|
||||||
|
|||||||
@ -22,22 +22,39 @@
|
|||||||
#define END(x) ((x) + array_size)
|
#define END(x) ((x) + array_size)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// There are three execution policies:
|
|
||||||
// auto exe_policy = std::execution::seq;
|
|
||||||
// auto exe_policy = std::execution::par;
|
|
||||||
constexpr auto exe_policy = std::execution::par_unseq;
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device)
|
STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device)
|
||||||
noexcept : array_size{ARRAY_SIZE}, range(0, array_size),
|
noexcept : array_size{ARRAY_SIZE}, range(0, array_size),
|
||||||
#ifdef USE_VECTOR
|
#ifdef USE_VECTOR
|
||||||
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
|
a(ARRAY_SIZE, alloc_vec<T>()), b(ARRAY_SIZE, alloc_vec<T>()), c(ARRAY_SIZE, alloc_vec<T>())
|
||||||
#else
|
#else
|
||||||
a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
|
||||||
b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
|
||||||
c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE))
|
|
||||||
#endif
|
#endif
|
||||||
{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; }
|
{
|
||||||
|
std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
|
||||||
|
#if USE_ONEDPL
|
||||||
|
std::cout << "Using oneDPL backend: ";
|
||||||
|
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
||||||
|
std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
|
||||||
|
#elif defined(ONEDPL_USE_TBB_BACKEND)
|
||||||
|
std::cout << "TBB " TBB_VERSION_STRING;
|
||||||
|
#elif defined(ONEDPL_USE_OPENMP_BACKEND)
|
||||||
|
std::cout << "OpenMP";
|
||||||
|
#else
|
||||||
|
std::cout << "Default";
|
||||||
|
#endif
|
||||||
|
std::cout << std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
STDIndicesStream<T>::~STDIndicesStream() {
|
||||||
|
#ifndef USE_VECTOR
|
||||||
|
dealloc_raw(a);
|
||||||
|
dealloc_raw(b);
|
||||||
|
dealloc_raw(c);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
|
void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
|
|||||||
@ -5,19 +5,14 @@
|
|||||||
// source code
|
// source code
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "dpl_shim.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include "Stream.h"
|
#include "Stream.h"
|
||||||
|
|
||||||
#ifdef USE_SPAN
|
|
||||||
#include <span>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#define IMPLEMENTATION_STRING "STD (index-oriented)"
|
#define IMPLEMENTATION_STRING "STD (index-oriented)"
|
||||||
|
|
||||||
|
|
||||||
// A lightweight counting iterator which will be used by the STL algorithms
|
// A lightweight counting iterator which will be used by the STL algorithms
|
||||||
// NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this
|
// NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this
|
||||||
// implementation doesn't target
|
// implementation doesn't target
|
||||||
@ -78,7 +73,7 @@ class STDIndicesStream : public Stream<T>
|
|||||||
|
|
||||||
// Device side pointers
|
// Device side pointers
|
||||||
#ifdef USE_VECTOR
|
#ifdef USE_VECTOR
|
||||||
std::vector<T> a, b, c;
|
std::vector<T, Allocator<T>> a, b, c;
|
||||||
#else
|
#else
|
||||||
T *a, *b, *c;
|
T *a, *b, *c;
|
||||||
#endif
|
#endif
|
||||||
@ -86,7 +81,7 @@ class STDIndicesStream : public Stream<T>
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
STDIndicesStream(const int, int) noexcept;
|
STDIndicesStream(const int, int) noexcept;
|
||||||
~STDIndicesStream() = default;
|
~STDIndicesStream();
|
||||||
|
|
||||||
virtual void copy() override;
|
virtual void copy() override;
|
||||||
virtual void add() override;
|
virtual void add() override;
|
||||||
|
|||||||
@ -27,10 +27,21 @@ register_flag_optional(USE_TBB
|
|||||||
"Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
|
"Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
|
||||||
"OFF")
|
"OFF")
|
||||||
|
|
||||||
|
register_flag_optional(USE_ONEDPL
|
||||||
|
"Link oneDPL which implements C++17 executor policies (via execution_policy_tag) for different backends.
|
||||||
|
|
||||||
|
Possible values are:
|
||||||
|
OPENMP - Implements policies using OpenMP.
|
||||||
|
CMake will handle any flags needed to enable OpenMP if the compiler supports it.
|
||||||
|
TBB - Implements policies using TBB.
|
||||||
|
TBB must be linked via USE_TBB or be available in LD_LIBRARY_PATH.
|
||||||
|
SYCL - Implements policies through SYCL2020.
|
||||||
|
This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically."
|
||||||
|
"OFF")
|
||||||
|
|
||||||
macro(setup)
|
macro(setup)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/shim_onedpl.cmake)
|
||||||
if (NVHPC_OFFLOAD)
|
if (NVHPC_OFFLOAD)
|
||||||
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
||||||
# propagate flags to linker so that it links with the gpu stuff as well
|
# propagate flags to linker so that it links with the gpu stuff as well
|
||||||
|
|||||||
@ -22,22 +22,39 @@
|
|||||||
#define END(x) ((x) + array_size)
|
#define END(x) ((x) + array_size)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// There are three execution policies:
|
|
||||||
// auto exe_policy = std::execution::seq;
|
|
||||||
// auto exe_policy = std::execution::par;
|
|
||||||
constexpr auto exe_policy = std::execution::par_unseq;
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
STDRangesStream<T>::STDRangesStream(const int ARRAY_SIZE, int device)
|
STDRangesStream<T>::STDRangesStream(const int ARRAY_SIZE, int device)
|
||||||
: array_size{ARRAY_SIZE},
|
noexcept : array_size{ARRAY_SIZE},
|
||||||
#ifdef USE_VECTOR
|
#ifdef USE_VECTOR
|
||||||
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
|
a(ARRAY_SIZE, alloc_vec<T>()), b(ARRAY_SIZE, alloc_vec<T>()), c(ARRAY_SIZE, alloc_vec<T>())
|
||||||
#else
|
#else
|
||||||
a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
|
||||||
b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
|
||||||
c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE))
|
|
||||||
#endif
|
#endif
|
||||||
{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; }
|
{
|
||||||
|
std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
|
||||||
|
#if USE_ONEDPL
|
||||||
|
std::cout << "Using oneDPL backend: ";
|
||||||
|
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
||||||
|
std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
|
||||||
|
#elif defined(ONEDPL_USE_TBB_BACKEND)
|
||||||
|
std::cout << "TBB " TBB_VERSION_STRING;
|
||||||
|
#elif defined(ONEDPL_USE_OPENMP_BACKEND)
|
||||||
|
std::cout << "OpenMP";
|
||||||
|
#else
|
||||||
|
std::cout << "Default";
|
||||||
|
#endif
|
||||||
|
std::cout << std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
STDRangesStream<T>::~STDRangesStream() {
|
||||||
|
#ifndef USE_VECTOR
|
||||||
|
dealloc_raw(a);
|
||||||
|
dealloc_raw(b);
|
||||||
|
dealloc_raw(c);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void STDRangesStream<T>::init_arrays(T initA, T initB, T initC)
|
void STDRangesStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
|
|||||||
@ -5,10 +5,10 @@
|
|||||||
// source code
|
// source code
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "dpl_shim.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include "Stream.h"
|
#include "Stream.h"
|
||||||
|
|
||||||
#define IMPLEMENTATION_STRING "STD C++ ranges"
|
#define IMPLEMENTATION_STRING "STD C++ ranges"
|
||||||
@ -22,14 +22,14 @@ class STDRangesStream : public Stream<T>
|
|||||||
|
|
||||||
// Device side pointers
|
// Device side pointers
|
||||||
#ifdef USE_VECTOR
|
#ifdef USE_VECTOR
|
||||||
std::vector<T> a, b, c;
|
std::vector<T, Allocator<T>> a, b, c;
|
||||||
#else
|
#else
|
||||||
T *a, *b, *c;
|
T *a, *b, *c;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
public:
|
public:
|
||||||
STDRangesStream(const int, int);
|
STDRangesStream(const int, int) noexcept;
|
||||||
~STDRangesStream() = default;
|
~STDRangesStream();
|
||||||
|
|
||||||
virtual void copy() override;
|
virtual void copy() override;
|
||||||
virtual void add() override;
|
virtual void add() override;
|
||||||
|
|||||||
@ -11,6 +11,18 @@ register_flag_optional(USE_TBB
|
|||||||
"No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
|
"No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
|
||||||
"OFF")
|
"OFF")
|
||||||
|
|
||||||
|
register_flag_optional(USE_ONEDPL
|
||||||
|
"Link oneDPL which implements C++17 executor policies (via execution_policy_tag) for different backends.
|
||||||
|
|
||||||
|
Possible values are:
|
||||||
|
OPENMP - Implements policies using OpenMP.
|
||||||
|
CMake will handle any flags needed to enable OpenMP if the compiler supports it.
|
||||||
|
TBB - Implements policies using TBB.
|
||||||
|
TBB must be linked via USE_TBB or be available in LD_LIBRARY_PATH.
|
||||||
|
SYCL - Implements policies through SYCL2020.
|
||||||
|
This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically."
|
||||||
|
"OFF")
|
||||||
|
|
||||||
macro(setup)
|
macro(setup)
|
||||||
|
|
||||||
# TODO this needs to eventually be removed when CMake adds proper C++20 support or at least update the flag used here
|
# TODO this needs to eventually be removed when CMake adds proper C++20 support or at least update the flag used here
|
||||||
@ -21,6 +33,7 @@ macro(setup)
|
|||||||
unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default
|
unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default
|
||||||
# and append our own:
|
# and append our own:
|
||||||
register_append_cxx_flags(ANY -std=c++2a)
|
register_append_cxx_flags(ANY -std=c++2a)
|
||||||
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/shim_onedpl.cmake)
|
||||||
if (USE_VECTOR)
|
if (USE_VECTOR)
|
||||||
register_definitions(USE_VECTOR)
|
register_definitions(USE_VECTOR)
|
||||||
endif ()
|
endif ()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user