From dfb4eb06b24245727c37e1356a3f0e73f2b83db0 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Thu, 28 Jul 2022 15:03:26 +0100 Subject: [PATCH] Add oneDPL for std models --- CMakeLists.txt | 26 +++++++++- cmake/register_models.cmake | 4 ++ cmake/shim_onedpl.cmake | 27 ++++++++++ src/dpl_shim.h | 75 ++++++++++++++++++++++++++++ src/std-data/STDDataStream.cpp | 46 ++++++++++------- src/std-data/STDDataStream.h | 5 +- src/std-data/model.cmake | 18 ++++++- src/std-indices/STDIndicesStream.cpp | 39 +++++++++++---- src/std-indices/STDIndicesStream.h | 11 ++-- src/std-indices/model.cmake | 17 +++++-- src/std-ranges/STDRangesStream.cpp | 39 +++++++++++---- src/std-ranges/STDRangesStream.hpp | 10 ++-- src/std-ranges/model.cmake | 17 ++++++- 13 files changed, 269 insertions(+), 65 deletions(-) create mode 100644 cmake/shim_onedpl.cmake create mode 100644 src/dpl_shim.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 14bd39e..263555a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,13 +76,15 @@ option(USE_TBB "Enable oneTBB library for *supported* models. Enabling this on m model on how this is used." OFF) if (USE_TBB) - include(FetchContent) FetchContent_Declare( TBB GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git GIT_TAG faaf43c4ab22cb4b4267d65d5e218fa58800eea8 ) - # Not using FetchContent_MakeAvailable because we need EXCLUDE_FROM_ALL + # Don't fail builds on waring (TBB has -Wall while not being free of warnings from unused symbols...) + set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) + set(TBB_STRICT OFF) + # Not using FetchContent_MakeAvailable (CMake>= 3.14) because we need EXCLUDE_FROM_ALL FetchContent_GetProperties(TBB) if (NOT TBB_POPULATED) FetchContent_Populate(TBB) @@ -90,6 +92,25 @@ if (USE_TBB) endif () endif () +option(USE_TBB "Enable oneDPL library for *supported* models. Enabling this on models that + don't explicitly link against DPL is a no-op, see description of your selected + model on how this is used." OFF) + +if (USE_ONEDPL) + FetchContent_Declare( + oneDPL + GIT_REPOSITORY https://github.com/oneapi-src/oneDPL.git + GIT_TAG oneDPL-2021.7.0-release + ) + # Not using FetchContent_MakeAvailable (CMake>= 3.14) because we need EXCLUDE_FROM_ALL + FetchContent_GetProperties(oneDPL) + if (NOT oneDPL_POPULATED) + FetchContent_Populate(oneDPL) + add_subdirectory(${onedpl_SOURCE_DIR} ${onedpl_BINARY_DIR} EXCLUDE_FROM_ALL) + endif () +endif() + + # include our macros include(cmake/register_models.cmake) @@ -170,6 +191,7 @@ include_directories(src) add_executable(${EXE_NAME} ${IMPL_SOURCES} src/main.cpp) target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES}) target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS}) +target_include_directories(${EXE_NAME} PUBLIC ${IMPL_DIRECTORIES}) if (CXX_EXTRA_LIBRARIES) target_link_libraries(${EXE_NAME} PUBLIC ${CXX_EXTRA_LIBRARIES}) diff --git a/cmake/register_models.cmake b/cmake/register_models.cmake index f180c03..9432313 100644 --- a/cmake/register_models.cmake +++ b/cmake/register_models.cmake @@ -71,6 +71,10 @@ macro(register_definitions) list(APPEND IMPL_DEFINITIONS ${ARGN}) endmacro() +macro(register_directories) + list(APPEND IMPL_DIRECTORIES ${ARGN}) +endmacro() + macro(register_flag_required NAME DESCRIPTION) list(APPEND CUSTOM_FLAGS_TRIPLE "${NAME}" "${DESCRIPTION}" ON "") endmacro() diff --git a/cmake/shim_onedpl.cmake b/cmake/shim_onedpl.cmake new file mode 100644 index 0000000..861d069 --- /dev/null +++ b/cmake/shim_onedpl.cmake @@ -0,0 +1,27 @@ + + +if (USE_ONEDPL) + # # XXX see https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-library-guide/top/oneapi-dpc-library-onedpl-overview.html + # # this is to avoid the system TBB headers (if exists) from having precedence which isn't compatible with oneDPL's par implementation + # register_definitions( + # PSTL_USE_PARALLEL_POLICIES=0 + # _GLIBCXX_USE_TBB_PAR_BACKEND=0 + # ) + register_definitions(USE_ONEDPL) + if (USE_ONEDPL STREQUAL "TBB") + register_definitions(ONEDPL_USE_TBB_BACKEND=1) + # TBB will either be linked later (USE_TBB==ON) or via extra libraries, don't do anything here + elseif (USE_ONEDPL STREQUAL "OPENMP") + register_definitions(ONEDPL_USE_OPENMP_BACKEND=1) + # Link OpenMP via CMAKE + find_package(OpenMP REQUIRED) + register_link_library(OpenMP::OpenMP_CXX) + elseif (USE_ONEDPL STREQUAL "SYCL") + register_definitions(ONEDPL_USE_DPCPP_BACKEND=1) + # This needs a SYCL compiler, will fail if CXX doesn't SYCL2020 + register_append_cxx_flags(ANY -fsycl-unnamed-lambda -fsycl) + else () + message(FATAL_ERROR "Unsupported USE_ONEDPL backend: ${USE_ONEDPL}, see USE_ONEDPL flag description for available values.") + endif () + register_directories(ANY ${onedpl_SOURCE_DIR}/include) +endif () \ No newline at end of file diff --git a/src/dpl_shim.h b/src/dpl_shim.h new file mode 100644 index 0000000..89012a3 --- /dev/null +++ b/src/dpl_shim.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + +#ifndef ALIGNMENT +#define ALIGNMENT (2*1024*1024) // 2MB +#endif + +#ifdef USE_ONEDPL + +// oneDPL C++17 PSTL + +#include +#include +#include + +#ifdef ONEDPL_USE_DPCPP_BACKEND + +#include + +const static auto exe_policy = oneapi::dpl::execution::device_policy<>{ + oneapi::dpl::execution::make_device_policy(cl::sycl::default_selector{}) +}; + +template using Allocator = sycl::usm_allocator; + +template +constexpr Allocator alloc_vec() { return {exe_policy.queue()}; }; + +template +T *alloc_raw(size_t size) { return sycl::malloc_shared(size, exe_policy.queue()); } + +template +void dealloc_raw(T *ptr) { sycl::free(ptr, exe_policy.queue()); } + +#else + +// auto exe_policy = dpl::execution::seq; +// auto exe_policy = dpl::execution::par; +static constexpr auto exe_policy = dpl::execution::par_unseq; +#define USE_STD_PTR_ALLOC_DEALLOC + +#endif + +#else + +// Normal C++17 PSTL + +#include +#include +#include + +// auto exe_policy = std::execution::seq; +// auto exe_policy = std::execution::par; +static constexpr auto exe_policy = std::execution::par_unseq; +#define USE_STD_PTR_ALLOC_DEALLOC + + +#endif + +#ifdef USE_STD_PTR_ALLOC_DEALLOC + +template using Allocator = std::allocator; + +template +constexpr Allocator alloc_vec() { return {}; }; + +template +T *alloc_raw(size_t size) { return (T *) aligned_alloc(ALIGNMENT, sizeof(T) * size); } + +template +void dealloc_raw(T *ptr) { free(ptr); } + +#endif diff --git a/src/std-data/STDDataStream.cpp b/src/std-data/STDDataStream.cpp index 34059f5..2bb6a33 100644 --- a/src/std-data/STDDataStream.cpp +++ b/src/std-data/STDDataStream.cpp @@ -6,14 +6,6 @@ #include "STDDataStream.h" -#include -#include -#include - -#ifndef ALIGNMENT -#define ALIGNMENT (2*1024*1024) // 2MB -#endif - #ifdef USE_VECTOR #define BEGIN(x) (x).begin() #define END(x) (x).end() @@ -22,23 +14,39 @@ #define END(x) ((x) + array_size) #endif -// There are three execution policies: -// auto exe_policy = std::execution::seq; -// auto exe_policy = std::execution::par; -constexpr auto exe_policy = std::execution::par_unseq; - - template STDDataStream::STDDataStream(const int ARRAY_SIZE, int device) noexcept : array_size{ARRAY_SIZE}, #ifdef USE_VECTOR - a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE) + a(ARRAY_SIZE, alloc_vec()), b(ARRAY_SIZE, alloc_vec()), c(ARRAY_SIZE, alloc_vec()) #else - a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)), - b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)), - c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)) + a(alloc_raw(ARRAY_SIZE)), b(alloc_raw(ARRAY_SIZE)), c(alloc_raw(ARRAY_SIZE)) #endif -{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; } +{ + std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl; +#if USE_ONEDPL + std::cout << "Using oneDPL backend: "; +#if defined(ONEDPL_USE_DPCPP_BACKEND) + std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info() << ")"; +#elif defined(ONEDPL_USE_TBB_BACKEND) + std::cout << "TBB " TBB_VERSION_STRING; +#elif defined(ONEDPL_USE_OPENMP_BACKEND) + std::cout << "OpenMP"; +#else + std::cout << "Default"; +#endif + std::cout << std::endl; +#endif +} + +template +STDDataStream::~STDDataStream() { +#ifndef USE_VECTOR + dealloc_raw(a); + dealloc_raw(b); + dealloc_raw(c); +#endif +} template void STDDataStream::init_arrays(T initA, T initB, T initC) diff --git a/src/std-data/STDDataStream.h b/src/std-data/STDDataStream.h index 84b4dcf..e50c95d 100644 --- a/src/std-data/STDDataStream.h +++ b/src/std-data/STDDataStream.h @@ -5,6 +5,7 @@ // source code #pragma once +#include "dpl_shim.h" #include #include @@ -22,7 +23,7 @@ class STDDataStream : public Stream // Device side pointers #ifdef USE_VECTOR - std::vector a, b, c; + std::vector> a, b, c; #else T *a, *b, *c; #endif @@ -30,7 +31,7 @@ class STDDataStream : public Stream public: STDDataStream(const int, int) noexcept; - ~STDDataStream() = default; + ~STDDataStream(); virtual void copy() override; virtual void add() override; diff --git a/src/std-data/model.cmake b/src/std-data/model.cmake index 3f79f13..f2fecba 100644 --- a/src/std-data/model.cmake +++ b/src/std-data/model.cmake @@ -27,18 +27,32 @@ register_flag_optional(USE_TBB "No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details." "OFF") +register_flag_optional(USE_ONEDPL + "Link oneDPL which implements C++17 executor policies (via execution_policy_tag) for different backends. + + Possible values are: + OPENMP - Implements policies using OpenMP. + CMake will handle any flags needed to enable OpenMP if the compiler supports it. + TBB - Implements policies using TBB. + TBB must be linked via USE_TBB or be available in LD_LIBRARY_PATH. + SYCL - Implements policies through SYCL2020. + This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically." + "OFF") + macro(setup) set(CMAKE_CXX_STANDARD 17) + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/shim_onedpl.cmake) + if (NVHPC_OFFLOAD) set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD}) # propagate flags to linker so that it links with the gpu stuff as well register_append_cxx_flags(ANY ${NVHPC_FLAGS}) register_append_link_flags(${NVHPC_FLAGS}) endif () - if(USE_VECTOR) + if (USE_VECTOR) register_definitions(USE_VECTOR) - endif() + endif () if (USE_TBB) register_link_library(TBB::tbb) endif () diff --git a/src/std-indices/STDIndicesStream.cpp b/src/std-indices/STDIndicesStream.cpp index d353777..4ec9977 100644 --- a/src/std-indices/STDIndicesStream.cpp +++ b/src/std-indices/STDIndicesStream.cpp @@ -22,22 +22,39 @@ #define END(x) ((x) + array_size) #endif -// There are three execution policies: -// auto exe_policy = std::execution::seq; -// auto exe_policy = std::execution::par; -constexpr auto exe_policy = std::execution::par_unseq; - template STDIndicesStream::STDIndicesStream(const int ARRAY_SIZE, int device) - noexcept : array_size{ARRAY_SIZE}, range(0, array_size), +noexcept : array_size{ARRAY_SIZE}, range(0, array_size), #ifdef USE_VECTOR - a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE) + a(ARRAY_SIZE, alloc_vec()), b(ARRAY_SIZE, alloc_vec()), c(ARRAY_SIZE, alloc_vec()) #else - a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)), - b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)), - c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)) + a(alloc_raw(ARRAY_SIZE)), b(alloc_raw(ARRAY_SIZE)), c(alloc_raw(ARRAY_SIZE)) #endif -{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; } +{ + std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl; +#if USE_ONEDPL + std::cout << "Using oneDPL backend: "; +#if defined(ONEDPL_USE_DPCPP_BACKEND) + std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info() << ")"; +#elif defined(ONEDPL_USE_TBB_BACKEND) + std::cout << "TBB " TBB_VERSION_STRING; +#elif defined(ONEDPL_USE_OPENMP_BACKEND) + std::cout << "OpenMP"; +#else + std::cout << "Default"; +#endif + std::cout << std::endl; +#endif +} + +template +STDIndicesStream::~STDIndicesStream() { +#ifndef USE_VECTOR + dealloc_raw(a); + dealloc_raw(b); + dealloc_raw(c); +#endif +} template void STDIndicesStream::init_arrays(T initA, T initB, T initC) diff --git a/src/std-indices/STDIndicesStream.h b/src/std-indices/STDIndicesStream.h index 3fd88f3..63254cd 100644 --- a/src/std-indices/STDIndicesStream.h +++ b/src/std-indices/STDIndicesStream.h @@ -5,19 +5,14 @@ // source code #pragma once +#include "dpl_shim.h" #include #include #include "Stream.h" -#ifdef USE_SPAN -#include -#endif - - #define IMPLEMENTATION_STRING "STD (index-oriented)" - // A lightweight counting iterator which will be used by the STL algorithms // NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this // implementation doesn't target @@ -78,7 +73,7 @@ class STDIndicesStream : public Stream // Device side pointers #ifdef USE_VECTOR - std::vector a, b, c; + std::vector> a, b, c; #else T *a, *b, *c; #endif @@ -86,7 +81,7 @@ class STDIndicesStream : public Stream public: STDIndicesStream(const int, int) noexcept; - ~STDIndicesStream() = default; + ~STDIndicesStream(); virtual void copy() override; virtual void add() override; diff --git a/src/std-indices/model.cmake b/src/std-indices/model.cmake index 7dc22b9..36e2ed8 100644 --- a/src/std-indices/model.cmake +++ b/src/std-indices/model.cmake @@ -27,19 +27,30 @@ register_flag_optional(USE_TBB "Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details." "OFF") +register_flag_optional(USE_ONEDPL + "Link oneDPL which implements C++17 executor policies (via execution_policy_tag) for different backends. + + Possible values are: + OPENMP - Implements policies using OpenMP. + CMake will handle any flags needed to enable OpenMP if the compiler supports it. + TBB - Implements policies using TBB. + TBB must be linked via USE_TBB or be available in LD_LIBRARY_PATH. + SYCL - Implements policies through SYCL2020. + This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically." + "OFF") macro(setup) set(CMAKE_CXX_STANDARD 17) - + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/shim_onedpl.cmake) if (NVHPC_OFFLOAD) set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD}) # propagate flags to linker so that it links with the gpu stuff as well register_append_cxx_flags(ANY ${NVHPC_FLAGS}) register_append_link_flags(${NVHPC_FLAGS}) endif () - if(USE_VECTOR) + if (USE_VECTOR) register_definitions(USE_VECTOR) - endif() + endif () if (USE_TBB) register_link_library(TBB::tbb) endif () diff --git a/src/std-ranges/STDRangesStream.cpp b/src/std-ranges/STDRangesStream.cpp index 356e6dc..29993bc 100644 --- a/src/std-ranges/STDRangesStream.cpp +++ b/src/std-ranges/STDRangesStream.cpp @@ -22,22 +22,39 @@ #define END(x) ((x) + array_size) #endif -// There are three execution policies: -// auto exe_policy = std::execution::seq; -// auto exe_policy = std::execution::par; -constexpr auto exe_policy = std::execution::par_unseq; - template STDRangesStream::STDRangesStream(const int ARRAY_SIZE, int device) - : array_size{ARRAY_SIZE}, +noexcept : array_size{ARRAY_SIZE}, #ifdef USE_VECTOR - a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE) + a(ARRAY_SIZE, alloc_vec()), b(ARRAY_SIZE, alloc_vec()), c(ARRAY_SIZE, alloc_vec()) #else - a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)), - b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)), - c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)) + a(alloc_raw(ARRAY_SIZE)), b(alloc_raw(ARRAY_SIZE)), c(alloc_raw(ARRAY_SIZE)) #endif -{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; } +{ + std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl; +#if USE_ONEDPL + std::cout << "Using oneDPL backend: "; +#if defined(ONEDPL_USE_DPCPP_BACKEND) + std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info() << ")"; +#elif defined(ONEDPL_USE_TBB_BACKEND) + std::cout << "TBB " TBB_VERSION_STRING; +#elif defined(ONEDPL_USE_OPENMP_BACKEND) + std::cout << "OpenMP"; +#else + std::cout << "Default"; +#endif + std::cout << std::endl; +#endif +} + +template +STDRangesStream::~STDRangesStream() { +#ifndef USE_VECTOR + dealloc_raw(a); + dealloc_raw(b); + dealloc_raw(c); +#endif +} template void STDRangesStream::init_arrays(T initA, T initB, T initC) diff --git a/src/std-ranges/STDRangesStream.hpp b/src/std-ranges/STDRangesStream.hpp index 33bc77b..21902c6 100644 --- a/src/std-ranges/STDRangesStream.hpp +++ b/src/std-ranges/STDRangesStream.hpp @@ -5,10 +5,10 @@ // source code #pragma once +#include "dpl_shim.h" #include -#include - +#include #include "Stream.h" #define IMPLEMENTATION_STRING "STD C++ ranges" @@ -22,14 +22,14 @@ class STDRangesStream : public Stream // Device side pointers #ifdef USE_VECTOR - std::vector a, b, c; + std::vector> a, b, c; #else T *a, *b, *c; #endif public: - STDRangesStream(const int, int); - ~STDRangesStream() = default; + STDRangesStream(const int, int) noexcept; + ~STDRangesStream(); virtual void copy() override; virtual void add() override; diff --git a/src/std-ranges/model.cmake b/src/std-ranges/model.cmake index 65e5489..2d90afc 100644 --- a/src/std-ranges/model.cmake +++ b/src/std-ranges/model.cmake @@ -11,6 +11,18 @@ register_flag_optional(USE_TBB "No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details." "OFF") +register_flag_optional(USE_ONEDPL + "Link oneDPL which implements C++17 executor policies (via execution_policy_tag) for different backends. + + Possible values are: + OPENMP - Implements policies using OpenMP. + CMake will handle any flags needed to enable OpenMP if the compiler supports it. + TBB - Implements policies using TBB. + TBB must be linked via USE_TBB or be available in LD_LIBRARY_PATH. + SYCL - Implements policies through SYCL2020. + This requires the DPC++ compiler (other SYCL compilers are untested), required SYCL flags are added automatically." + "OFF") + macro(setup) # TODO this needs to eventually be removed when CMake adds proper C++20 support or at least update the flag used here @@ -21,9 +33,10 @@ macro(setup) unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default # and append our own: register_append_cxx_flags(ANY -std=c++2a) - if(USE_VECTOR) + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/shim_onedpl.cmake) + if (USE_VECTOR) register_definitions(USE_VECTOR) - endif() + endif () if (USE_TBB) register_link_library(TBB::tbb) endif ()