From 2cd52228b733368e8cd57075f444c53f113c1f1a Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Wed, 22 Dec 2021 12:20:33 +0000 Subject: [PATCH] Revert "Add oneDPL support for std-data and std-indices" This reverts commit 6ea09a9620674d1bf2626cacda582b7ec6256ee4. --- CMakeLists.txt | 2 +- src/std-data/STDDataStream.cpp | 41 ++++------- src/std-data/STDDataStream.h | 52 ++------------ src/std-data/model.cmake | 50 ------------- src/std-indices/STDIndicesStream.cpp | 51 +++++--------- src/std-indices/STDIndicesStream.h | 102 +++++++++------------------ src/std-indices/model.cmake | 50 ------------- 7 files changed, 74 insertions(+), 274 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index edf05f5..3730602 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(BabelStream VERSION 3.5 LANGUAGES CXX) # uncomment for debugging build issues: -set(CMAKE_VERBOSE_MAKEFILE ON) +#set(CMAKE_VERBOSE_MAKEFILE ON) # some nicer defaults for standard C++ set(CMAKE_CXX_EXTENSIONS OFF) diff --git a/src/std-data/STDDataStream.cpp b/src/std-data/STDDataStream.cpp index 0be23a3..a2628e1 100644 --- a/src/std-data/STDDataStream.cpp +++ b/src/std-data/STDDataStream.cpp @@ -5,31 +5,21 @@ // source code #include "STDDataStream.h" -#include + +#include +#include +#include + +// There are three execution policies: +// auto exe_policy = std::execution::seq; +// auto exe_policy = std::execution::par; +auto exe_policy = std::execution::par_unseq; + template -STDDataStream::STDDataStream(const int ARRAY_SIZE, int device) : array_size{ARRAY_SIZE}, -#if defined(ONEDPL_USE_DPCPP_BACKEND) - exe_policy(oneapi::dpl::execution::make_device_policy(cl::sycl::default_selector{})), - allocator(exe_policy.queue()), - a(array_size, allocator), b(array_size, allocator), c(array_size, allocator) -#else - a(array_size), b(array_size),c(array_size) -#endif +STDDataStream::STDDataStream(const int ARRAY_SIZE, int device) + noexcept : array_size{ARRAY_SIZE}, a(array_size), b(array_size), c(array_size) { -#if USE_ONEDPL - std::cout << "Using oneDPL backend: "; - #if defined(ONEDPL_USE_DPCPP_BACKEND) - std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info() << ")"; - #elif defined(ONEDPL_USE_TBB_BACKEND) - std::cout << "TBB"; - #elif defined(ONEDPL_USE_OPENMP_BACKEND) - std::cout << "OpenMP"; - #else - std::cout << "Default"; - #endif - std::cout << std::endl; -#endif } template @@ -43,10 +33,9 @@ void STDDataStream::init_arrays(T initA, T initB, T initC) template void STDDataStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) { - // operator = is deleted because h_* vectors may have different allocator type compared to ours - std::copy(a.begin(), a.end(), h_a.begin()); - std::copy(b.begin(), b.end(), h_b.begin()); - std::copy(c.begin(), c.end(), h_c.begin()); + h_a = a; + h_b = b; + h_c = c; } template diff --git a/src/std-data/STDDataStream.h b/src/std-data/STDDataStream.h index c9a1f43..f8bc302 100644 --- a/src/std-data/STDDataStream.h +++ b/src/std-data/STDDataStream.h @@ -6,35 +6,12 @@ #pragma once +#include +#include #include "Stream.h" #define IMPLEMENTATION_STRING "STD (data-oriented)" -#ifdef USE_ONEDPL - #define PSTL_USAGE_WARNINGS 1 - - #include - #include - #include - #include - #include - - #ifdef ONEDPL_USE_DPCPP_BACKEND - #include - #endif -#else - #include - #include - #include - - #if defined(ONEDPL_USE_DPCPP_BACKEND) || \ - defined(ONEDPL_USE_TBB_BACKEND) || \ - defined(ONEDPL_USE_OPENMP_BACKEND) - #error oneDPL missing (ONEDPL_VERSION_MAJOR not defined) but backend (ONEDPL_USE_*_BACKEND) specified - #endif - -#endif - template class STDDataStream : public Stream @@ -43,31 +20,14 @@ class STDDataStream : public Stream // Size of arrays int array_size; -#if defined(ONEDPL_USE_DPCPP_BACKEND) - // SYCL oneDPL backend - using ExecutionPolicy = oneapi::dpl::execution::device_policy<>; - using Allocator = sycl::usm_allocator; -#elif defined(USE_ONEDPL) - // every other non-SYCL oneDPL backend (i.e TBB, OMP) - using ExecutionPolicy = decltype(oneapi::dpl::execution::par_unseq); - using Allocator = std::allocator; -#else - // normal std execution policies - using ExecutionPolicy = decltype(std::execution::par_unseq); - using Allocator = std::allocator; -#endif - - ExecutionPolicy exe_policy{}; - Allocator allocator; - // Device side pointers - std::vector a; - std::vector b; - std::vector c; + std::vector a; + std::vector b; + std::vector c; public: - STDDataStream(const int, int); + STDDataStream(const int, int) noexcept; ~STDDataStream() = default; virtual void copy() override; diff --git a/src/std-data/model.cmake b/src/std-data/model.cmake index 2454295..ef69f30 100644 --- a/src/std-data/model.cmake +++ b/src/std-data/model.cmake @@ -19,30 +19,9 @@ register_flag_optional(NVHPC_OFFLOAD ccall - Compile for all supported compute capabilities" "") -register_flag_optional(ONEDPL_OFFLOAD - "Use the DPC++ oneDPL library which supports STL algorithms on SYCL, TBB, and OpenMP. - This option only supports the oneDPL library shipped with oneAPI, and must use the dpcpp - compiler (i.e -DCMAKE_CXX_COMPILER=dpcpp) for this option. - Make sure your oneAPI installation includes at least the following components: dpcpp, onedpl, onetbb. - The env. variable `TBBROOT` needs to point to the base directory of your TBB install (e.g /opt/intel/oneapi/tbb/latest/). - This should be done by oneAPI's `setvars.sh` script automatically. - - Possible values are: - TBB - Use the TBB backend, the correct TBB library will be linked from oneAPI automatically. - OMP - Use the OpenMP backend - DPCPP - Use the SYCL (via dpcpp) backend with the default selector. - See https://intel.github.io/llvm-docs/EnvironmentVariables.html#sycl-device-filter - on selecting a non-default device or SYCL backend." - "") - - macro(setup) set(CMAKE_CXX_STANDARD 17) - if (NVHPC_OFFLOAD AND ONEDPL_OFFLOAD) - message(FATAL_ERROR "NVHPC_OFFLOAD and NVHPC_OFFLOAD are mutually exclusive") - endif () - if (NVHPC_OFFLOAD) set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD}) # propagate flags to linker so that it links with the gpu stuff as well @@ -51,33 +30,4 @@ macro(setup) endif () - if (ONEDPL_OFFLOAD) - set(CXX_EXTRA_FLAGS) - set(CXX_EXTRA_LIBRARIES /opt/intel/oneapi/tbb/2021.4.0/lib/intel64/gcc4.8/libtbb.so) - # propagate flags to linker so that it links with the gpu stuff as well - register_append_cxx_flags(ANY -fopenmp -fsycl-unnamed-lambda -fsycl) - - # XXX see https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-library-guide/top/oneapi-dpc-library-onedpl-overview.html - # this is to avoid the system TBB headers (if exists) from having precedence which isn't compatible with oneDPL's par implementation - register_definitions( - PSTL_USE_PARALLEL_POLICIES=0 - _GLIBCXX_USE_TBB_PAR_BACKEND=0 - ) - - register_definitions(USE_ONEDPL) - if (ONEDPL_OFFLOAD STREQUAL "TBB") - register_definitions(ONEDPL_USE_TBB_BACKEND=1) - elseif (ONEDPL_OFFLOAD STREQUAL "OPENMP") - register_definitions(ONEDPL_USE_OPENMP_BACKEND=1) - elseif (ONEDPL_OFFLOAD STREQUAL "SYCL") - register_definitions(ONEDPL_USE_DPCPP_BACKEND=1) - else () - message(FATAL_ERROR "Unsupported ONEDPL_OFFLOAD backend: ${ONEDPL_OFFLOAD}") - endif () - - # even with the workaround above, -ltbb may still end up with the wrong one, so be explicit here - register_link_library($ENV{TBBROOT}/lib/intel64/gcc4.8/libtbb.so) - - endif () - endmacro() diff --git a/src/std-indices/STDIndicesStream.cpp b/src/std-indices/STDIndicesStream.cpp index ceb9d3d..a88dd18 100644 --- a/src/std-indices/STDIndicesStream.cpp +++ b/src/std-indices/STDIndicesStream.cpp @@ -5,32 +5,21 @@ // source code #include "STDIndicesStream.h" -#include + +#include +#include +#include + +// There are three execution policies: +// auto exe_policy = std::execution::seq; +// auto exe_policy = std::execution::par; +auto exe_policy = std::execution::par_unseq; + template -STDIndicesStream::STDIndicesStream(const int ARRAY_SIZE, int device) : -array_size{ARRAY_SIZE}, range_start(0), range_end(array_size), -#if defined(ONEDPL_USE_DPCPP_BACKEND) -exe_policy(oneapi::dpl::execution::make_device_policy(cl::sycl::default_selector{})), - allocator(exe_policy.queue()), - a(array_size, allocator), b(array_size, allocator), c(array_size, allocator) -#else -a(array_size), b(array_size),c(array_size) -#endif +STDIndicesStream::STDIndicesStream(const int ARRAY_SIZE, int device) + noexcept : array_size{ARRAY_SIZE}, range(0, array_size), a(array_size), b(array_size), c(array_size) { -#if USE_ONEDPL - std::cout << "Using oneDPL backend: "; - #if defined(ONEDPL_USE_DPCPP_BACKEND) - std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info() << ")"; - #elif defined(ONEDPL_USE_TBB_BACKEND) - std::cout << "TBB"; - #elif defined(ONEDPL_USE_OPENMP_BACKEND) - std::cout << "OpenMP"; - #else - std::cout << "Default"; - #endif - std::cout << std::endl; -#endif } template @@ -44,13 +33,11 @@ void STDIndicesStream::init_arrays(T initA, T initB, T initC) template void STDIndicesStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) { - // operator = is deleted because h_* vectors may have different allocator type compared to ours - std::copy(a.begin(), a.end(), h_a.begin()); - std::copy(b.begin(), b.end(), h_b.begin()); - std::copy(c.begin(), c.end(), h_c.begin()); + h_a = a; + h_b = b; + h_c = c; } - template void STDIndicesStream::copy() { @@ -62,7 +49,7 @@ template void STDIndicesStream::mul() { // b[i] = scalar * c[i]; - std::transform(exe_policy, range_start, range_end, b.begin(), [&, scalar = startScalar](int i) { + std::transform(exe_policy, range.begin(), range.end(), b.begin(), [&, scalar = startScalar](int i) { return scalar * c[i]; }); } @@ -71,7 +58,7 @@ template void STDIndicesStream::add() { // c[i] = a[i] + b[i]; - std::transform(exe_policy, range_start, range_end, c.begin(), [&](int i) { + std::transform(exe_policy, range.begin(), range.end(), c.begin(), [&](int i) { return a[i] + b[i]; }); } @@ -80,7 +67,7 @@ template void STDIndicesStream::triad() { // a[i] = b[i] + scalar * c[i]; - std::transform(exe_policy, range_start, range_end, a.begin(), [&, scalar = startScalar](int i) { + std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) { return b[i] + scalar * c[i]; }); } @@ -92,7 +79,7 @@ void STDIndicesStream::nstream() // Need to do in two stages with C++11 STL. // 1: a[i] += b[i] // 2: a[i] += scalar * c[i]; - std::transform(exe_policy, range_start, range_end, a.begin(), [&, scalar = startScalar](int i) { + std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) { return a[i] + b[i] + scalar * c[i]; }); } diff --git a/src/std-indices/STDIndicesStream.h b/src/std-indices/STDIndicesStream.h index dfb63f6..66c8bb0 100644 --- a/src/std-indices/STDIndicesStream.h +++ b/src/std-indices/STDIndicesStream.h @@ -6,62 +6,46 @@ #pragma once +#include +#include #include "Stream.h" #define IMPLEMENTATION_STRING "STD (index-oriented)" - -#ifdef USE_ONEDPL - #define PSTL_USAGE_WARNINGS 1 - - #include - #include - #include - #include - #include - - #ifdef ONEDPL_USE_DPCPP_BACKEND - #include - #endif -#else - #include - #include - #include - - #if defined(ONEDPL_USE_DPCPP_BACKEND) || \ - defined(ONEDPL_USE_TBB_BACKEND) || \ - defined(ONEDPL_USE_OPENMP_BACKEND) - #error oneDPL missing (ONEDPL_VERSION_MAJOR not defined) but backend (ONEDPL_USE_*_BACKEND) specified - #endif - -#endif - // A lightweight counting iterator which will be used by the STL algorithms // NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this // implementation doesn't target template -class ranged_iterator { - N num; +class ranged { + N from, to; public: - using difference_type = N; - using value_type = N; - using pointer = const N*; - using reference = const N&; - using iterator_category = std::random_access_iterator_tag; - explicit ranged_iterator(N _num = 0) : num(_num) {} + ranged(N from, N to ): from(from), to(to) {} + class iterator { + N num; + public: + using difference_type = N; + using value_type = N; + using pointer = const N*; + using reference = const N&; + using iterator_category = std::random_access_iterator_tag; + explicit iterator(N _num = 0) : num(_num) {} - ranged_iterator& operator++() { num++; return *this; } - ranged_iterator operator++(int) { ranged_iterator retval = *this; ++(*this); return retval; } - ranged_iterator operator+(const value_type v) const { return ranged_iterator(num + v); } + iterator& operator++() { num++; return *this; } + iterator operator++(int) { iterator retval = *this; ++(*this); return retval; } + iterator operator+(const value_type v) const { return iterator(num + v); } - bool operator==(ranged_iterator other) const { return num == other.num; } - bool operator!=(ranged_iterator other) const { return *this != other; } - bool operator<(ranged_iterator other) const { return num < other.num; } + bool operator==(iterator other) const { return num == other.num; } + bool operator!=(iterator other) const { return *this != other; } + bool operator<(iterator other) const { return num < other.num; } - reference operator*() const { return num;} - difference_type operator-(const ranged_iterator &it) const { return num - it.num; } - value_type operator[](const difference_type &i) const { return num + i; } + reference operator*() const { return num;} + difference_type operator-(const iterator &it) const { return num - it.num; } + value_type operator[](const difference_type &i) const { return num + i; } + + }; + iterator begin() { return iterator(from); } + iterator end() { return iterator(to >= from? to+1 : to-1); } }; template @@ -71,37 +55,17 @@ class STDIndicesStream : public Stream // Size of arrays int array_size; -#if defined(ONEDPL_USE_DPCPP_BACKEND) - // SYCL oneDPL backend - using ExecutionPolicy = oneapi::dpl::execution::device_policy<>; - using Allocator = sycl::usm_allocator; - using IteratorType = oneapi::dpl::counting_iterator; -#elif defined(USE_ONEDPL) - // every other non-SYCL oneDPL backend (i.e TBB, OMP) - using ExecutionPolicy = decltype(oneapi::dpl::execution::par_unseq); - using Allocator = std::allocator; - using IteratorType = oneapi::dpl::counting_iterator; -#else - // normal std execution policies - using ExecutionPolicy = decltype(std::execution::par_unseq); - using Allocator = std::allocator; - using IteratorType = ranged_iterator; -#endif - - IteratorType range_start; - IteratorType range_end; - - ExecutionPolicy exe_policy{}; - Allocator allocator; + // induction range + ranged range; // Device side pointers - std::vector a; - std::vector b; - std::vector c; + std::vector a; + std::vector b; + std::vector c; public: - STDIndicesStream(const int, int); + STDIndicesStream(const int, int) noexcept; ~STDIndicesStream() = default; virtual void copy() override; diff --git a/src/std-indices/model.cmake b/src/std-indices/model.cmake index 2454295..ef69f30 100644 --- a/src/std-indices/model.cmake +++ b/src/std-indices/model.cmake @@ -19,30 +19,9 @@ register_flag_optional(NVHPC_OFFLOAD ccall - Compile for all supported compute capabilities" "") -register_flag_optional(ONEDPL_OFFLOAD - "Use the DPC++ oneDPL library which supports STL algorithms on SYCL, TBB, and OpenMP. - This option only supports the oneDPL library shipped with oneAPI, and must use the dpcpp - compiler (i.e -DCMAKE_CXX_COMPILER=dpcpp) for this option. - Make sure your oneAPI installation includes at least the following components: dpcpp, onedpl, onetbb. - The env. variable `TBBROOT` needs to point to the base directory of your TBB install (e.g /opt/intel/oneapi/tbb/latest/). - This should be done by oneAPI's `setvars.sh` script automatically. - - Possible values are: - TBB - Use the TBB backend, the correct TBB library will be linked from oneAPI automatically. - OMP - Use the OpenMP backend - DPCPP - Use the SYCL (via dpcpp) backend with the default selector. - See https://intel.github.io/llvm-docs/EnvironmentVariables.html#sycl-device-filter - on selecting a non-default device or SYCL backend." - "") - - macro(setup) set(CMAKE_CXX_STANDARD 17) - if (NVHPC_OFFLOAD AND ONEDPL_OFFLOAD) - message(FATAL_ERROR "NVHPC_OFFLOAD and NVHPC_OFFLOAD are mutually exclusive") - endif () - if (NVHPC_OFFLOAD) set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD}) # propagate flags to linker so that it links with the gpu stuff as well @@ -51,33 +30,4 @@ macro(setup) endif () - if (ONEDPL_OFFLOAD) - set(CXX_EXTRA_FLAGS) - set(CXX_EXTRA_LIBRARIES /opt/intel/oneapi/tbb/2021.4.0/lib/intel64/gcc4.8/libtbb.so) - # propagate flags to linker so that it links with the gpu stuff as well - register_append_cxx_flags(ANY -fopenmp -fsycl-unnamed-lambda -fsycl) - - # XXX see https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-library-guide/top/oneapi-dpc-library-onedpl-overview.html - # this is to avoid the system TBB headers (if exists) from having precedence which isn't compatible with oneDPL's par implementation - register_definitions( - PSTL_USE_PARALLEL_POLICIES=0 - _GLIBCXX_USE_TBB_PAR_BACKEND=0 - ) - - register_definitions(USE_ONEDPL) - if (ONEDPL_OFFLOAD STREQUAL "TBB") - register_definitions(ONEDPL_USE_TBB_BACKEND=1) - elseif (ONEDPL_OFFLOAD STREQUAL "OPENMP") - register_definitions(ONEDPL_USE_OPENMP_BACKEND=1) - elseif (ONEDPL_OFFLOAD STREQUAL "SYCL") - register_definitions(ONEDPL_USE_DPCPP_BACKEND=1) - else () - message(FATAL_ERROR "Unsupported ONEDPL_OFFLOAD backend: ${ONEDPL_OFFLOAD}") - endif () - - # even with the workaround above, -ltbb may still end up with the wrong one, so be explicit here - register_link_library($ENV{TBBROOT}/lib/intel64/gcc4.8/libtbb.so) - - endif () - endmacro()