Revert "Add oneDPL support for std-data and std-indices"
This reverts commit 6ea09a9620.
This commit is contained in:
parent
6ea09a9620
commit
2cd52228b7
@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
|||||||
project(BabelStream VERSION 3.5 LANGUAGES CXX)
|
project(BabelStream VERSION 3.5 LANGUAGES CXX)
|
||||||
|
|
||||||
# uncomment for debugging build issues:
|
# uncomment for debugging build issues:
|
||||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
#set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||||
|
|
||||||
# some nicer defaults for standard C++
|
# some nicer defaults for standard C++
|
||||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||||
|
|||||||
@ -5,31 +5,21 @@
|
|||||||
// source code
|
// source code
|
||||||
|
|
||||||
#include "STDDataStream.h"
|
#include "STDDataStream.h"
|
||||||
#include <iostream>
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <execution>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
// There are three execution policies:
|
||||||
|
// auto exe_policy = std::execution::seq;
|
||||||
|
// auto exe_policy = std::execution::par;
|
||||||
|
auto exe_policy = std::execution::par_unseq;
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device) : array_size{ARRAY_SIZE},
|
STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
|
||||||
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
noexcept : array_size{ARRAY_SIZE}, a(array_size), b(array_size), c(array_size)
|
||||||
exe_policy(oneapi::dpl::execution::make_device_policy(cl::sycl::default_selector{})),
|
|
||||||
allocator(exe_policy.queue()),
|
|
||||||
a(array_size, allocator), b(array_size, allocator), c(array_size, allocator)
|
|
||||||
#else
|
|
||||||
a(array_size), b(array_size),c(array_size)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
#if USE_ONEDPL
|
|
||||||
std::cout << "Using oneDPL backend: ";
|
|
||||||
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
|
||||||
std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
|
|
||||||
#elif defined(ONEDPL_USE_TBB_BACKEND)
|
|
||||||
std::cout << "TBB";
|
|
||||||
#elif defined(ONEDPL_USE_OPENMP_BACKEND)
|
|
||||||
std::cout << "OpenMP";
|
|
||||||
#else
|
|
||||||
std::cout << "Default";
|
|
||||||
#endif
|
|
||||||
std::cout << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -43,10 +33,9 @@ void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
|
|||||||
template <class T>
|
template <class T>
|
||||||
void STDDataStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
void STDDataStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||||
{
|
{
|
||||||
// operator = is deleted because h_* vectors may have different allocator type compared to ours
|
h_a = a;
|
||||||
std::copy(a.begin(), a.end(), h_a.begin());
|
h_b = b;
|
||||||
std::copy(b.begin(), b.end(), h_b.begin());
|
h_c = c;
|
||||||
std::copy(c.begin(), c.end(), h_c.begin());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
|||||||
@ -6,35 +6,12 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <stdexcept>
|
||||||
#include "Stream.h"
|
#include "Stream.h"
|
||||||
|
|
||||||
#define IMPLEMENTATION_STRING "STD (data-oriented)"
|
#define IMPLEMENTATION_STRING "STD (data-oriented)"
|
||||||
|
|
||||||
#ifdef USE_ONEDPL
|
|
||||||
#define PSTL_USAGE_WARNINGS 1
|
|
||||||
|
|
||||||
#include <oneapi/dpl/execution>
|
|
||||||
#include <oneapi/dpl/iterator>
|
|
||||||
#include <oneapi/dpl/algorithm>
|
|
||||||
#include <oneapi/dpl/memory>
|
|
||||||
#include <oneapi/dpl/numeric>
|
|
||||||
|
|
||||||
#ifdef ONEDPL_USE_DPCPP_BACKEND
|
|
||||||
#include <CL/sycl.hpp>
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#include <algorithm>
|
|
||||||
#include <execution>
|
|
||||||
#include <numeric>
|
|
||||||
|
|
||||||
#if defined(ONEDPL_USE_DPCPP_BACKEND) || \
|
|
||||||
defined(ONEDPL_USE_TBB_BACKEND) || \
|
|
||||||
defined(ONEDPL_USE_OPENMP_BACKEND)
|
|
||||||
#error oneDPL missing (ONEDPL_VERSION_MAJOR not defined) but backend (ONEDPL_USE_*_BACKEND) specified
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
class STDDataStream : public Stream<T>
|
class STDDataStream : public Stream<T>
|
||||||
@ -43,31 +20,14 @@ class STDDataStream : public Stream<T>
|
|||||||
// Size of arrays
|
// Size of arrays
|
||||||
int array_size;
|
int array_size;
|
||||||
|
|
||||||
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
|
||||||
// SYCL oneDPL backend
|
|
||||||
using ExecutionPolicy = oneapi::dpl::execution::device_policy<>;
|
|
||||||
using Allocator = sycl::usm_allocator<T, sycl::usm::alloc::shared>;
|
|
||||||
#elif defined(USE_ONEDPL)
|
|
||||||
// every other non-SYCL oneDPL backend (i.e TBB, OMP)
|
|
||||||
using ExecutionPolicy = decltype(oneapi::dpl::execution::par_unseq);
|
|
||||||
using Allocator = std::allocator<T>;
|
|
||||||
#else
|
|
||||||
// normal std execution policies
|
|
||||||
using ExecutionPolicy = decltype(std::execution::par_unseq);
|
|
||||||
using Allocator = std::allocator<T>;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ExecutionPolicy exe_policy{};
|
|
||||||
Allocator allocator;
|
|
||||||
|
|
||||||
// Device side pointers
|
// Device side pointers
|
||||||
std::vector<T, Allocator> a;
|
std::vector<T> a;
|
||||||
std::vector<T, Allocator> b;
|
std::vector<T> b;
|
||||||
std::vector<T, Allocator> c;
|
std::vector<T> c;
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
STDDataStream(const int, int);
|
STDDataStream(const int, int) noexcept;
|
||||||
~STDDataStream() = default;
|
~STDDataStream() = default;
|
||||||
|
|
||||||
virtual void copy() override;
|
virtual void copy() override;
|
||||||
|
|||||||
@ -19,30 +19,9 @@ register_flag_optional(NVHPC_OFFLOAD
|
|||||||
ccall - Compile for all supported compute capabilities"
|
ccall - Compile for all supported compute capabilities"
|
||||||
"")
|
"")
|
||||||
|
|
||||||
register_flag_optional(ONEDPL_OFFLOAD
|
|
||||||
"Use the DPC++ oneDPL library which supports STL algorithms on SYCL, TBB, and OpenMP.
|
|
||||||
This option only supports the oneDPL library shipped with oneAPI, and must use the dpcpp
|
|
||||||
compiler (i.e -DCMAKE_CXX_COMPILER=dpcpp) for this option.
|
|
||||||
Make sure your oneAPI installation includes at least the following components: dpcpp, onedpl, onetbb.
|
|
||||||
The env. variable `TBBROOT` needs to point to the base directory of your TBB install (e.g /opt/intel/oneapi/tbb/latest/).
|
|
||||||
This should be done by oneAPI's `setvars.sh` script automatically.
|
|
||||||
|
|
||||||
Possible values are:
|
|
||||||
TBB - Use the TBB backend, the correct TBB library will be linked from oneAPI automatically.
|
|
||||||
OMP - Use the OpenMP backend
|
|
||||||
DPCPP - Use the SYCL (via dpcpp) backend with the default selector.
|
|
||||||
See https://intel.github.io/llvm-docs/EnvironmentVariables.html#sycl-device-filter
|
|
||||||
on selecting a non-default device or SYCL backend."
|
|
||||||
"")
|
|
||||||
|
|
||||||
|
|
||||||
macro(setup)
|
macro(setup)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
if (NVHPC_OFFLOAD AND ONEDPL_OFFLOAD)
|
|
||||||
message(FATAL_ERROR "NVHPC_OFFLOAD and NVHPC_OFFLOAD are mutually exclusive")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (NVHPC_OFFLOAD)
|
if (NVHPC_OFFLOAD)
|
||||||
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
||||||
# propagate flags to linker so that it links with the gpu stuff as well
|
# propagate flags to linker so that it links with the gpu stuff as well
|
||||||
@ -51,33 +30,4 @@ macro(setup)
|
|||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
||||||
if (ONEDPL_OFFLOAD)
|
|
||||||
set(CXX_EXTRA_FLAGS)
|
|
||||||
set(CXX_EXTRA_LIBRARIES /opt/intel/oneapi/tbb/2021.4.0/lib/intel64/gcc4.8/libtbb.so)
|
|
||||||
# propagate flags to linker so that it links with the gpu stuff as well
|
|
||||||
register_append_cxx_flags(ANY -fopenmp -fsycl-unnamed-lambda -fsycl)
|
|
||||||
|
|
||||||
# XXX see https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-library-guide/top/oneapi-dpc-library-onedpl-overview.html
|
|
||||||
# this is to avoid the system TBB headers (if exists) from having precedence which isn't compatible with oneDPL's par implementation
|
|
||||||
register_definitions(
|
|
||||||
PSTL_USE_PARALLEL_POLICIES=0
|
|
||||||
_GLIBCXX_USE_TBB_PAR_BACKEND=0
|
|
||||||
)
|
|
||||||
|
|
||||||
register_definitions(USE_ONEDPL)
|
|
||||||
if (ONEDPL_OFFLOAD STREQUAL "TBB")
|
|
||||||
register_definitions(ONEDPL_USE_TBB_BACKEND=1)
|
|
||||||
elseif (ONEDPL_OFFLOAD STREQUAL "OPENMP")
|
|
||||||
register_definitions(ONEDPL_USE_OPENMP_BACKEND=1)
|
|
||||||
elseif (ONEDPL_OFFLOAD STREQUAL "SYCL")
|
|
||||||
register_definitions(ONEDPL_USE_DPCPP_BACKEND=1)
|
|
||||||
else ()
|
|
||||||
message(FATAL_ERROR "Unsupported ONEDPL_OFFLOAD backend: ${ONEDPL_OFFLOAD}")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
# even with the workaround above, -ltbb may still end up with the wrong one, so be explicit here
|
|
||||||
register_link_library($ENV{TBBROOT}/lib/intel64/gcc4.8/libtbb.so)
|
|
||||||
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|||||||
@ -5,32 +5,21 @@
|
|||||||
// source code
|
// source code
|
||||||
|
|
||||||
#include "STDIndicesStream.h"
|
#include "STDIndicesStream.h"
|
||||||
#include <iostream>
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <execution>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
// There are three execution policies:
|
||||||
|
// auto exe_policy = std::execution::seq;
|
||||||
|
// auto exe_policy = std::execution::par;
|
||||||
|
auto exe_policy = std::execution::par_unseq;
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device) :
|
STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device)
|
||||||
array_size{ARRAY_SIZE}, range_start(0), range_end(array_size),
|
noexcept : array_size{ARRAY_SIZE}, range(0, array_size), a(array_size), b(array_size), c(array_size)
|
||||||
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
|
||||||
exe_policy(oneapi::dpl::execution::make_device_policy(cl::sycl::default_selector{})),
|
|
||||||
allocator(exe_policy.queue()),
|
|
||||||
a(array_size, allocator), b(array_size, allocator), c(array_size, allocator)
|
|
||||||
#else
|
|
||||||
a(array_size), b(array_size),c(array_size)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
#if USE_ONEDPL
|
|
||||||
std::cout << "Using oneDPL backend: ";
|
|
||||||
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
|
||||||
std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
|
|
||||||
#elif defined(ONEDPL_USE_TBB_BACKEND)
|
|
||||||
std::cout << "TBB";
|
|
||||||
#elif defined(ONEDPL_USE_OPENMP_BACKEND)
|
|
||||||
std::cout << "OpenMP";
|
|
||||||
#else
|
|
||||||
std::cout << "Default";
|
|
||||||
#endif
|
|
||||||
std::cout << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -44,13 +33,11 @@ void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
|
|||||||
template <class T>
|
template <class T>
|
||||||
void STDIndicesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
void STDIndicesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||||
{
|
{
|
||||||
// operator = is deleted because h_* vectors may have different allocator type compared to ours
|
h_a = a;
|
||||||
std::copy(a.begin(), a.end(), h_a.begin());
|
h_b = b;
|
||||||
std::copy(b.begin(), b.end(), h_b.begin());
|
h_c = c;
|
||||||
std::copy(c.begin(), c.end(), h_c.begin());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void STDIndicesStream<T>::copy()
|
void STDIndicesStream<T>::copy()
|
||||||
{
|
{
|
||||||
@ -62,7 +49,7 @@ template <class T>
|
|||||||
void STDIndicesStream<T>::mul()
|
void STDIndicesStream<T>::mul()
|
||||||
{
|
{
|
||||||
// b[i] = scalar * c[i];
|
// b[i] = scalar * c[i];
|
||||||
std::transform(exe_policy, range_start, range_end, b.begin(), [&, scalar = startScalar](int i) {
|
std::transform(exe_policy, range.begin(), range.end(), b.begin(), [&, scalar = startScalar](int i) {
|
||||||
return scalar * c[i];
|
return scalar * c[i];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -71,7 +58,7 @@ template <class T>
|
|||||||
void STDIndicesStream<T>::add()
|
void STDIndicesStream<T>::add()
|
||||||
{
|
{
|
||||||
// c[i] = a[i] + b[i];
|
// c[i] = a[i] + b[i];
|
||||||
std::transform(exe_policy, range_start, range_end, c.begin(), [&](int i) {
|
std::transform(exe_policy, range.begin(), range.end(), c.begin(), [&](int i) {
|
||||||
return a[i] + b[i];
|
return a[i] + b[i];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -80,7 +67,7 @@ template <class T>
|
|||||||
void STDIndicesStream<T>::triad()
|
void STDIndicesStream<T>::triad()
|
||||||
{
|
{
|
||||||
// a[i] = b[i] + scalar * c[i];
|
// a[i] = b[i] + scalar * c[i];
|
||||||
std::transform(exe_policy, range_start, range_end, a.begin(), [&, scalar = startScalar](int i) {
|
std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) {
|
||||||
return b[i] + scalar * c[i];
|
return b[i] + scalar * c[i];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -92,7 +79,7 @@ void STDIndicesStream<T>::nstream()
|
|||||||
// Need to do in two stages with C++11 STL.
|
// Need to do in two stages with C++11 STL.
|
||||||
// 1: a[i] += b[i]
|
// 1: a[i] += b[i]
|
||||||
// 2: a[i] += scalar * c[i];
|
// 2: a[i] += scalar * c[i];
|
||||||
std::transform(exe_policy, range_start, range_end, a.begin(), [&, scalar = startScalar](int i) {
|
std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) {
|
||||||
return a[i] + b[i] + scalar * c[i];
|
return a[i] + b[i] + scalar * c[i];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,62 +6,46 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <stdexcept>
|
||||||
#include "Stream.h"
|
#include "Stream.h"
|
||||||
|
|
||||||
#define IMPLEMENTATION_STRING "STD (index-oriented)"
|
#define IMPLEMENTATION_STRING "STD (index-oriented)"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef USE_ONEDPL
|
|
||||||
#define PSTL_USAGE_WARNINGS 1
|
|
||||||
|
|
||||||
#include <oneapi/dpl/execution>
|
|
||||||
#include <oneapi/dpl/iterator>
|
|
||||||
#include <oneapi/dpl/algorithm>
|
|
||||||
#include <oneapi/dpl/memory>
|
|
||||||
#include <oneapi/dpl/numeric>
|
|
||||||
|
|
||||||
#ifdef ONEDPL_USE_DPCPP_BACKEND
|
|
||||||
#include <CL/sycl.hpp>
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#include <algorithm>
|
|
||||||
#include <execution>
|
|
||||||
#include <numeric>
|
|
||||||
|
|
||||||
#if defined(ONEDPL_USE_DPCPP_BACKEND) || \
|
|
||||||
defined(ONEDPL_USE_TBB_BACKEND) || \
|
|
||||||
defined(ONEDPL_USE_OPENMP_BACKEND)
|
|
||||||
#error oneDPL missing (ONEDPL_VERSION_MAJOR not defined) but backend (ONEDPL_USE_*_BACKEND) specified
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// A lightweight counting iterator which will be used by the STL algorithms
|
// A lightweight counting iterator which will be used by the STL algorithms
|
||||||
// NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this
|
// NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this
|
||||||
// implementation doesn't target
|
// implementation doesn't target
|
||||||
template <typename N>
|
template <typename N>
|
||||||
class ranged_iterator {
|
class ranged {
|
||||||
N num;
|
N from, to;
|
||||||
public:
|
public:
|
||||||
|
ranged(N from, N to ): from(from), to(to) {}
|
||||||
|
class iterator {
|
||||||
|
N num;
|
||||||
|
public:
|
||||||
using difference_type = N;
|
using difference_type = N;
|
||||||
using value_type = N;
|
using value_type = N;
|
||||||
using pointer = const N*;
|
using pointer = const N*;
|
||||||
using reference = const N&;
|
using reference = const N&;
|
||||||
using iterator_category = std::random_access_iterator_tag;
|
using iterator_category = std::random_access_iterator_tag;
|
||||||
explicit ranged_iterator(N _num = 0) : num(_num) {}
|
explicit iterator(N _num = 0) : num(_num) {}
|
||||||
|
|
||||||
ranged_iterator<N>& operator++() { num++; return *this; }
|
iterator& operator++() { num++; return *this; }
|
||||||
ranged_iterator<N> operator++(int) { ranged_iterator<N> retval = *this; ++(*this); return retval; }
|
iterator operator++(int) { iterator retval = *this; ++(*this); return retval; }
|
||||||
ranged_iterator<N> operator+(const value_type v) const { return ranged_iterator<N>(num + v); }
|
iterator operator+(const value_type v) const { return iterator(num + v); }
|
||||||
|
|
||||||
bool operator==(ranged_iterator<N> other) const { return num == other.num; }
|
bool operator==(iterator other) const { return num == other.num; }
|
||||||
bool operator!=(ranged_iterator<N> other) const { return *this != other; }
|
bool operator!=(iterator other) const { return *this != other; }
|
||||||
bool operator<(ranged_iterator<N> other) const { return num < other.num; }
|
bool operator<(iterator other) const { return num < other.num; }
|
||||||
|
|
||||||
reference operator*() const { return num;}
|
reference operator*() const { return num;}
|
||||||
difference_type operator-(const ranged_iterator<N> &it) const { return num - it.num; }
|
difference_type operator-(const iterator &it) const { return num - it.num; }
|
||||||
value_type operator[](const difference_type &i) const { return num + i; }
|
value_type operator[](const difference_type &i) const { return num + i; }
|
||||||
|
|
||||||
|
};
|
||||||
|
iterator begin() { return iterator(from); }
|
||||||
|
iterator end() { return iterator(to >= from? to+1 : to-1); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -71,37 +55,17 @@ class STDIndicesStream : public Stream<T>
|
|||||||
// Size of arrays
|
// Size of arrays
|
||||||
int array_size;
|
int array_size;
|
||||||
|
|
||||||
#if defined(ONEDPL_USE_DPCPP_BACKEND)
|
// induction range
|
||||||
// SYCL oneDPL backend
|
ranged<int> range;
|
||||||
using ExecutionPolicy = oneapi::dpl::execution::device_policy<>;
|
|
||||||
using Allocator = sycl::usm_allocator<T, sycl::usm::alloc::shared>;
|
|
||||||
using IteratorType = oneapi::dpl::counting_iterator<int>;
|
|
||||||
#elif defined(USE_ONEDPL)
|
|
||||||
// every other non-SYCL oneDPL backend (i.e TBB, OMP)
|
|
||||||
using ExecutionPolicy = decltype(oneapi::dpl::execution::par_unseq);
|
|
||||||
using Allocator = std::allocator<T>;
|
|
||||||
using IteratorType = oneapi::dpl::counting_iterator<int>;
|
|
||||||
#else
|
|
||||||
// normal std execution policies
|
|
||||||
using ExecutionPolicy = decltype(std::execution::par_unseq);
|
|
||||||
using Allocator = std::allocator<T>;
|
|
||||||
using IteratorType = ranged_iterator<int>;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
IteratorType range_start;
|
|
||||||
IteratorType range_end;
|
|
||||||
|
|
||||||
ExecutionPolicy exe_policy{};
|
|
||||||
Allocator allocator;
|
|
||||||
|
|
||||||
// Device side pointers
|
// Device side pointers
|
||||||
std::vector<T, Allocator> a;
|
std::vector<T> a;
|
||||||
std::vector<T, Allocator> b;
|
std::vector<T> b;
|
||||||
std::vector<T, Allocator> c;
|
std::vector<T> c;
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
STDIndicesStream(const int, int);
|
STDIndicesStream(const int, int) noexcept;
|
||||||
~STDIndicesStream() = default;
|
~STDIndicesStream() = default;
|
||||||
|
|
||||||
virtual void copy() override;
|
virtual void copy() override;
|
||||||
|
|||||||
@ -19,30 +19,9 @@ register_flag_optional(NVHPC_OFFLOAD
|
|||||||
ccall - Compile for all supported compute capabilities"
|
ccall - Compile for all supported compute capabilities"
|
||||||
"")
|
"")
|
||||||
|
|
||||||
register_flag_optional(ONEDPL_OFFLOAD
|
|
||||||
"Use the DPC++ oneDPL library which supports STL algorithms on SYCL, TBB, and OpenMP.
|
|
||||||
This option only supports the oneDPL library shipped with oneAPI, and must use the dpcpp
|
|
||||||
compiler (i.e -DCMAKE_CXX_COMPILER=dpcpp) for this option.
|
|
||||||
Make sure your oneAPI installation includes at least the following components: dpcpp, onedpl, onetbb.
|
|
||||||
The env. variable `TBBROOT` needs to point to the base directory of your TBB install (e.g /opt/intel/oneapi/tbb/latest/).
|
|
||||||
This should be done by oneAPI's `setvars.sh` script automatically.
|
|
||||||
|
|
||||||
Possible values are:
|
|
||||||
TBB - Use the TBB backend, the correct TBB library will be linked from oneAPI automatically.
|
|
||||||
OMP - Use the OpenMP backend
|
|
||||||
DPCPP - Use the SYCL (via dpcpp) backend with the default selector.
|
|
||||||
See https://intel.github.io/llvm-docs/EnvironmentVariables.html#sycl-device-filter
|
|
||||||
on selecting a non-default device or SYCL backend."
|
|
||||||
"")
|
|
||||||
|
|
||||||
|
|
||||||
macro(setup)
|
macro(setup)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
if (NVHPC_OFFLOAD AND ONEDPL_OFFLOAD)
|
|
||||||
message(FATAL_ERROR "NVHPC_OFFLOAD and NVHPC_OFFLOAD are mutually exclusive")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (NVHPC_OFFLOAD)
|
if (NVHPC_OFFLOAD)
|
||||||
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
||||||
# propagate flags to linker so that it links with the gpu stuff as well
|
# propagate flags to linker so that it links with the gpu stuff as well
|
||||||
@ -51,33 +30,4 @@ macro(setup)
|
|||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
||||||
if (ONEDPL_OFFLOAD)
|
|
||||||
set(CXX_EXTRA_FLAGS)
|
|
||||||
set(CXX_EXTRA_LIBRARIES /opt/intel/oneapi/tbb/2021.4.0/lib/intel64/gcc4.8/libtbb.so)
|
|
||||||
# propagate flags to linker so that it links with the gpu stuff as well
|
|
||||||
register_append_cxx_flags(ANY -fopenmp -fsycl-unnamed-lambda -fsycl)
|
|
||||||
|
|
||||||
# XXX see https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-library-guide/top/oneapi-dpc-library-onedpl-overview.html
|
|
||||||
# this is to avoid the system TBB headers (if exists) from having precedence which isn't compatible with oneDPL's par implementation
|
|
||||||
register_definitions(
|
|
||||||
PSTL_USE_PARALLEL_POLICIES=0
|
|
||||||
_GLIBCXX_USE_TBB_PAR_BACKEND=0
|
|
||||||
)
|
|
||||||
|
|
||||||
register_definitions(USE_ONEDPL)
|
|
||||||
if (ONEDPL_OFFLOAD STREQUAL "TBB")
|
|
||||||
register_definitions(ONEDPL_USE_TBB_BACKEND=1)
|
|
||||||
elseif (ONEDPL_OFFLOAD STREQUAL "OPENMP")
|
|
||||||
register_definitions(ONEDPL_USE_OPENMP_BACKEND=1)
|
|
||||||
elseif (ONEDPL_OFFLOAD STREQUAL "SYCL")
|
|
||||||
register_definitions(ONEDPL_USE_DPCPP_BACKEND=1)
|
|
||||||
else ()
|
|
||||||
message(FATAL_ERROR "Unsupported ONEDPL_OFFLOAD backend: ${ONEDPL_OFFLOAD}")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
# even with the workaround above, -ltbb may still end up with the wrong one, so be explicit here
|
|
||||||
register_link_library($ENV{TBBROOT}/lib/intel64/gcc4.8/libtbb.so)
|
|
||||||
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user