Add options for std::vector or raw pointers for TBB/STD
This commit is contained in:
parent
240962722f
commit
5f6e714bdd
@ -10,60 +10,79 @@
|
||||
#include <execution>
|
||||
#include <numeric>
|
||||
|
||||
#ifndef ALIGNMENT
|
||||
#define ALIGNMENT (2*1024*1024) // 2MB
|
||||
#endif
|
||||
|
||||
#ifdef USE_VECTOR
|
||||
#define BEGIN(x) (x).begin()
|
||||
#define END(x) (x).end()
|
||||
#else
|
||||
#define BEGIN(x) (x)
|
||||
#define END(x) ((x) + array_size)
|
||||
#endif
|
||||
|
||||
// There are three execution policies:
|
||||
// auto exe_policy = std::execution::seq;
|
||||
// auto exe_policy = std::execution::par;
|
||||
auto exe_policy = std::execution::par_unseq;
|
||||
constexpr auto exe_policy = std::execution::par_unseq;
|
||||
|
||||
|
||||
template <class T>
|
||||
STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
|
||||
noexcept : array_size{ARRAY_SIZE}, a(array_size), b(array_size), c(array_size)
|
||||
{
|
||||
}
|
||||
noexcept : array_size{ARRAY_SIZE},
|
||||
#ifdef USE_VECTOR
|
||||
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
|
||||
#else
|
||||
array_size(ARRAY_SIZE),
|
||||
a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
||||
b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
||||
c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE))
|
||||
#endif
|
||||
{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; }
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
{
|
||||
std::fill(exe_policy, a.begin(), a.end(), initA);
|
||||
std::fill(exe_policy, b.begin(), b.end(), initB);
|
||||
std::fill(exe_policy, c.begin(), c.end(), initC);
|
||||
std::fill(exe_policy, BEGIN(a), END(a), initA);
|
||||
std::fill(exe_policy, BEGIN(b), END(b), initB);
|
||||
std::fill(exe_policy, BEGIN(c), END(c), initC);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
h_a = a;
|
||||
h_b = b;
|
||||
h_c = c;
|
||||
std::copy(BEGIN(a), END(a), h_a.begin());
|
||||
std::copy(BEGIN(b), END(b), h_b.begin());
|
||||
std::copy(BEGIN(c), END(c), h_c.begin());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::copy()
|
||||
{
|
||||
// c[i] = a[i]
|
||||
std::copy(exe_policy, a.begin(), a.end(), c.begin());
|
||||
std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::mul()
|
||||
{
|
||||
// b[i] = scalar * c[i];
|
||||
std::transform(exe_policy, c.begin(), c.end(), b.begin(), [scalar = startScalar](T ci){ return scalar*ci; });
|
||||
std::transform(exe_policy, BEGIN(c), END(c), BEGIN(b), [scalar = startScalar](T ci){ return scalar*ci; });
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::add()
|
||||
{
|
||||
// c[i] = a[i] + b[i];
|
||||
std::transform(exe_policy, a.begin(), a.end(), b.begin(), c.begin(), std::plus<T>());
|
||||
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(c), std::plus<T>());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::triad()
|
||||
{
|
||||
// a[i] = b[i] + scalar * c[i];
|
||||
std::transform(exe_policy, b.begin(), b.end(), c.begin(), a.begin(), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
|
||||
std::transform(exe_policy, BEGIN(b), END(b), BEGIN(c), BEGIN(a), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -73,8 +92,8 @@ void STDDataStream<T>::nstream()
|
||||
// Need to do in two stages with C++11 STL.
|
||||
// 1: a[i] += b[i]
|
||||
// 2: a[i] += scalar * c[i];
|
||||
std::transform(exe_policy, a.begin(), a.end(), b.begin(), a.begin(), [](T ai, T bi){ return ai + bi; });
|
||||
std::transform(exe_policy, a.begin(), a.end(), c.begin(), a.begin(), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
|
||||
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(a), [](T ai, T bi){ return ai + bi; });
|
||||
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(c), BEGIN(a), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
|
||||
}
|
||||
|
||||
|
||||
@ -82,7 +101,7 @@ template <class T>
|
||||
T STDDataStream<T>::dot()
|
||||
{
|
||||
// sum = 0; sum += a[i]*b[i]; return sum;
|
||||
return std::transform_reduce(exe_policy, a.begin(), a.end(), b.begin(), 0.0);
|
||||
return std::transform_reduce(exe_policy, BEGIN(a), END(a), BEGIN(b), 0.0);
|
||||
}
|
||||
|
||||
void listDevices(void)
|
||||
@ -102,3 +121,5 @@ std::string getDeviceDriver(const int)
|
||||
template class STDDataStream<float>;
|
||||
template class STDDataStream<double>;
|
||||
|
||||
#undef BEGIN
|
||||
#undef END
|
||||
@ -21,9 +21,11 @@ class STDDataStream : public Stream<T>
|
||||
int array_size;
|
||||
|
||||
// Device side pointers
|
||||
std::vector<T> a;
|
||||
std::vector<T> b;
|
||||
std::vector<T> c;
|
||||
#ifdef USE_VECTOR
|
||||
std::vector<T> a, b, c;
|
||||
#else
|
||||
T *a, *b, *c;
|
||||
#endif
|
||||
|
||||
|
||||
public:
|
||||
|
||||
@ -3,6 +3,10 @@ register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(USE_VECTOR
|
||||
"Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
|
||||
"OFF")
|
||||
|
||||
register_flag_optional(NVHPC_OFFLOAD
|
||||
"Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
|
||||
The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
|
||||
@ -28,6 +32,8 @@ macro(setup)
|
||||
register_append_cxx_flags(ANY ${NVHPC_FLAGS})
|
||||
register_append_link_flags(${NVHPC_FLAGS})
|
||||
endif ()
|
||||
|
||||
if(USE_VECTOR)
|
||||
register_definitions(USE_VECTOR)
|
||||
endif()
|
||||
|
||||
endmacro()
|
||||
|
||||
@ -10,46 +10,63 @@
|
||||
#include <execution>
|
||||
#include <numeric>
|
||||
|
||||
#ifndef ALIGNMENT
|
||||
#define ALIGNMENT (2*1024*1024) // 2MB
|
||||
#endif
|
||||
|
||||
#ifdef USE_VECTOR
|
||||
#define BEGIN(x) (x).begin()
|
||||
#define END(x) (x).end()
|
||||
#else
|
||||
#define BEGIN(x) (x)
|
||||
#define END(x) ((x) + array_size)
|
||||
#endif
|
||||
|
||||
// There are three execution policies:
|
||||
// auto exe_policy = std::execution::seq;
|
||||
// auto exe_policy = std::execution::par;
|
||||
auto exe_policy = std::execution::par_unseq;
|
||||
|
||||
constexpr auto exe_policy = std::execution::par_unseq;
|
||||
|
||||
template <class T>
|
||||
STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device)
|
||||
noexcept : array_size{ARRAY_SIZE}, range(0, array_size), a(array_size), b(array_size), c(array_size)
|
||||
{
|
||||
}
|
||||
noexcept : array_size{ARRAY_SIZE}, range(0, array_size),
|
||||
#ifdef USE_VECTOR
|
||||
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
|
||||
#else
|
||||
a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
||||
b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
||||
c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE))
|
||||
#endif
|
||||
{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; }
|
||||
|
||||
template <class T>
|
||||
void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
{
|
||||
std::fill(exe_policy, a.begin(), a.end(), initA);
|
||||
std::fill(exe_policy, b.begin(), b.end(), initB);
|
||||
std::fill(exe_policy, c.begin(), c.end(), initC);
|
||||
std::fill(exe_policy, BEGIN(a), END(a), initA);
|
||||
std::fill(exe_policy, BEGIN(b), END(b), initB);
|
||||
std::fill(exe_policy, BEGIN(c), END(c), initC);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDIndicesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
h_a = a;
|
||||
h_b = b;
|
||||
h_c = c;
|
||||
std::copy(BEGIN(a), END(a), h_a.begin());
|
||||
std::copy(BEGIN(b), END(b), h_b.begin());
|
||||
std::copy(BEGIN(c), END(c), h_c.begin());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDIndicesStream<T>::copy()
|
||||
{
|
||||
// c[i] = a[i]
|
||||
std::copy(exe_policy, a.begin(), a.end(), c.begin());
|
||||
std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDIndicesStream<T>::mul()
|
||||
{
|
||||
// b[i] = scalar * c[i];
|
||||
std::transform(exe_policy, range.begin(), range.end(), b.begin(), [&, scalar = startScalar](int i) {
|
||||
std::transform(exe_policy, range.begin(), range.end(), BEGIN(b), [&, scalar = startScalar](int i) {
|
||||
return scalar * c[i];
|
||||
});
|
||||
}
|
||||
@ -58,7 +75,7 @@ template <class T>
|
||||
void STDIndicesStream<T>::add()
|
||||
{
|
||||
// c[i] = a[i] + b[i];
|
||||
std::transform(exe_policy, range.begin(), range.end(), c.begin(), [&](int i) {
|
||||
std::transform(exe_policy, range.begin(), range.end(), BEGIN(c), [&](int i) {
|
||||
return a[i] + b[i];
|
||||
});
|
||||
}
|
||||
@ -67,7 +84,7 @@ template <class T>
|
||||
void STDIndicesStream<T>::triad()
|
||||
{
|
||||
// a[i] = b[i] + scalar * c[i];
|
||||
std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) {
|
||||
std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [&, scalar = startScalar](int i) {
|
||||
return b[i] + scalar * c[i];
|
||||
});
|
||||
}
|
||||
@ -79,7 +96,7 @@ void STDIndicesStream<T>::nstream()
|
||||
// Need to do in two stages with C++11 STL.
|
||||
// 1: a[i] += b[i]
|
||||
// 2: a[i] += scalar * c[i];
|
||||
std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) {
|
||||
std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [&, scalar = startScalar](int i) {
|
||||
return a[i] + b[i] + scalar * c[i];
|
||||
});
|
||||
}
|
||||
@ -89,7 +106,7 @@ template <class T>
|
||||
T STDIndicesStream<T>::dot()
|
||||
{
|
||||
// sum = 0; sum += a[i]*b[i]; return sum;
|
||||
return std::transform_reduce(exe_policy, a.begin(), a.end(), b.begin(), 0.0);
|
||||
return std::transform_reduce(exe_policy, BEGIN(a), END(a), BEGIN(b), 0.0);
|
||||
}
|
||||
|
||||
void listDevices(void)
|
||||
@ -109,3 +126,5 @@ std::string getDeviceDriver(const int)
|
||||
template class STDIndicesStream<float>;
|
||||
template class STDIndicesStream<double>;
|
||||
|
||||
#undef BEGIN
|
||||
#undef END
|
||||
@ -10,6 +10,11 @@
|
||||
#include <stdexcept>
|
||||
#include "Stream.h"
|
||||
|
||||
#ifdef USE_SPAN
|
||||
#include <span>
|
||||
#endif
|
||||
|
||||
|
||||
#define IMPLEMENTATION_STRING "STD (index-oriented)"
|
||||
|
||||
|
||||
@ -60,9 +65,11 @@ class STDIndicesStream : public Stream<T>
|
||||
ranged<int> range;
|
||||
|
||||
// Device side pointers
|
||||
std::vector<T> a;
|
||||
std::vector<T> b;
|
||||
std::vector<T> c;
|
||||
#ifdef USE_VECTOR
|
||||
std::vector<T> a, b, c;
|
||||
#else
|
||||
T *a, *b, *c;
|
||||
#endif
|
||||
|
||||
|
||||
public:
|
||||
|
||||
@ -3,6 +3,10 @@ register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(USE_VECTOR
|
||||
"Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
|
||||
"OFF")
|
||||
|
||||
register_flag_optional(NVHPC_OFFLOAD
|
||||
"Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
|
||||
The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
|
||||
@ -28,6 +32,8 @@ macro(setup)
|
||||
register_append_cxx_flags(ANY ${NVHPC_FLAGS})
|
||||
register_append_link_flags(${NVHPC_FLAGS})
|
||||
endif ()
|
||||
|
||||
if(USE_VECTOR)
|
||||
register_definitions(USE_VECTOR)
|
||||
endif()
|
||||
|
||||
endmacro()
|
||||
|
||||
@ -10,20 +10,40 @@
|
||||
#include <execution>
|
||||
#include <ranges>
|
||||
|
||||
#ifndef ALIGNMENT
|
||||
#define ALIGNMENT (2*1024*1024) // 2MB
|
||||
#endif
|
||||
|
||||
#ifdef USE_VECTOR
|
||||
#define BEGIN(x) (x).begin()
|
||||
#define END(x) (x).end()
|
||||
#else
|
||||
#define BEGIN(x) (x)
|
||||
#define END(x) ((x) + array_size)
|
||||
#endif
|
||||
|
||||
// There are three execution policies:
|
||||
// auto exe_policy = std::execution::seq;
|
||||
// auto exe_policy = std::execution::par;
|
||||
constexpr auto exe_policy = std::execution::par_unseq;
|
||||
|
||||
template <class T>
|
||||
STDRangesStream<T>::STDRangesStream(const int ARRAY_SIZE, int device)
|
||||
: array_size{ARRAY_SIZE}
|
||||
{
|
||||
a = std::vector<T>(array_size);
|
||||
b = std::vector<T>(array_size);
|
||||
c = std::vector<T>(array_size);
|
||||
}
|
||||
: array_size{ARRAY_SIZE},
|
||||
#ifdef USE_VECTOR
|
||||
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
|
||||
#else
|
||||
a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
||||
b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
||||
c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE))
|
||||
#endif
|
||||
{ std::cout <<"Backing storage typeid: " << typeid(a).name() << std::endl; }
|
||||
|
||||
template <class T>
|
||||
void STDRangesStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
{
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
exe_policy,
|
||||
std::views::iota(0).begin(), array_size, // loop range
|
||||
[&] (int i) {
|
||||
a[i] = initA;
|
||||
@ -37,16 +57,16 @@ template <class T>
|
||||
void STDRangesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
// Element-wise copy.
|
||||
h_a = a;
|
||||
h_b = b;
|
||||
h_c = c;
|
||||
std::copy(BEGIN(a), END(a), h_a.begin());
|
||||
std::copy(BEGIN(b), END(b), h_b.begin());
|
||||
std::copy(BEGIN(c), END(c), h_c.begin());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDRangesStream<T>::copy()
|
||||
{
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
exe_policy,
|
||||
std::views::iota(0).begin(), array_size,
|
||||
[&] (int i) {
|
||||
c[i] = a[i];
|
||||
@ -60,7 +80,7 @@ void STDRangesStream<T>::mul()
|
||||
const T scalar = startScalar;
|
||||
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
exe_policy,
|
||||
std::views::iota(0).begin(), array_size,
|
||||
[&] (int i) {
|
||||
b[i] = scalar * c[i];
|
||||
@ -72,7 +92,7 @@ template <class T>
|
||||
void STDRangesStream<T>::add()
|
||||
{
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
exe_policy,
|
||||
std::views::iota(0).begin(), array_size,
|
||||
[&] (int i) {
|
||||
c[i] = a[i] + b[i];
|
||||
@ -86,7 +106,7 @@ void STDRangesStream<T>::triad()
|
||||
const T scalar = startScalar;
|
||||
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
exe_policy,
|
||||
std::views::iota(0).begin(), array_size,
|
||||
[&] (int i) {
|
||||
a[i] = b[i] + scalar * c[i];
|
||||
@ -100,7 +120,7 @@ void STDRangesStream<T>::nstream()
|
||||
const T scalar = startScalar;
|
||||
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
exe_policy,
|
||||
std::views::iota(0).begin(), array_size,
|
||||
[&] (int i) {
|
||||
a[i] += b[i] + scalar * c[i];
|
||||
@ -114,8 +134,8 @@ T STDRangesStream<T>::dot()
|
||||
// sum += a[i] * b[i];
|
||||
return
|
||||
std::transform_reduce(
|
||||
std::execution::par_unseq,
|
||||
a.begin(), a.end(), b.begin(), 0.0);
|
||||
exe_policy,
|
||||
BEGIN(a), END(a), BEGIN(b), 0.0);
|
||||
}
|
||||
|
||||
void listDevices(void)
|
||||
@ -136,3 +156,5 @@ std::string getDeviceDriver(const int)
|
||||
template class STDRangesStream<float>;
|
||||
template class STDRangesStream<double>;
|
||||
|
||||
#undef BEGIN
|
||||
#undef END
|
||||
@ -21,9 +21,11 @@ class STDRangesStream : public Stream<T>
|
||||
int array_size;
|
||||
|
||||
// Device side pointers
|
||||
std::vector<T> a;
|
||||
std::vector<T> b;
|
||||
std::vector<T> c;
|
||||
#ifdef USE_VECTOR
|
||||
std::vector<T> a, b, c;
|
||||
#else
|
||||
T *a, *b, *c;
|
||||
#endif
|
||||
|
||||
public:
|
||||
STDRangesStream(const int, int);
|
||||
|
||||
@ -3,6 +3,10 @@ register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection and supports C++20 Ranges"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(USE_VECTOR
|
||||
"Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
|
||||
"OFF")
|
||||
|
||||
macro(setup)
|
||||
|
||||
# TODO this needs to eventually be removed when CMake adds proper C++20 support or at least update the flag used here
|
||||
@ -13,4 +17,7 @@ macro(setup)
|
||||
unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default
|
||||
# and append our own:
|
||||
register_append_cxx_flags(ANY -std=c++2a)
|
||||
if(USE_VECTOR)
|
||||
register_definitions(USE_VECTOR)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
@ -5,15 +5,37 @@
|
||||
// source code
|
||||
|
||||
#include "TBBStream.hpp"
|
||||
#include <cstdlib>
|
||||
|
||||
#ifndef ALIGNMENT
|
||||
#define ALIGNMENT (2*1024*1024) // 2MB
|
||||
#endif
|
||||
|
||||
#ifdef USE_VECTOR
|
||||
#define BEGIN(x) (x).begin()
|
||||
#define END(x) (x).end()
|
||||
#else
|
||||
#define BEGIN(x) (x)
|
||||
#define END(x) ((x) + array_size)
|
||||
#endif
|
||||
|
||||
template <class T>
|
||||
TBBStream<T>::TBBStream(const int ARRAY_SIZE, int device)
|
||||
: partitioner(), range(0, ARRAY_SIZE), a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
|
||||
: partitioner(), range(0, ARRAY_SIZE),
|
||||
#ifdef USE_VECTOR
|
||||
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
|
||||
#else
|
||||
array_size(ARRAY_SIZE),
|
||||
a((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
||||
b((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE)),
|
||||
c((T *) aligned_alloc(ALIGNMENT, sizeof(T) * ARRAY_SIZE))
|
||||
#endif
|
||||
{
|
||||
if(device != 0){
|
||||
throw std::runtime_error("Device != 0 is not supported by TBB");
|
||||
}
|
||||
std::cout << "Using TBB partitioner: " PARTITIONER_NAME << std::endl;
|
||||
std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
|
||||
}
|
||||
|
||||
|
||||
@ -35,9 +57,9 @@ template <class T>
|
||||
void TBBStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
// Element-wise copy.
|
||||
h_a = a;
|
||||
h_b = b;
|
||||
h_c = c;
|
||||
std::copy(BEGIN(a), END(a), h_a.begin());
|
||||
std::copy(BEGIN(b), END(b), h_b.begin());
|
||||
std::copy(BEGIN(c), END(c), h_c.begin());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -132,3 +154,5 @@ std::string getDeviceDriver(const int)
|
||||
template class TBBStream<float>;
|
||||
template class TBBStream<double>;
|
||||
|
||||
#undef BEGIN
|
||||
#undef END
|
||||
@ -40,10 +40,15 @@ class TBBStream : public Stream<T>
|
||||
tbb_partitioner partitioner;
|
||||
tbb::blocked_range<size_t> range;
|
||||
// Device side pointers
|
||||
std::vector<T> a;
|
||||
std::vector<T> b;
|
||||
std::vector<T> c;
|
||||
|
||||
#ifdef USE_VECTOR
|
||||
std::vector<T> a, b, c;
|
||||
#else
|
||||
size_t array_size;
|
||||
T *a, *b, *c;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
public:
|
||||
TBBStream(const int, int);
|
||||
~TBBStream() = default;
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
|
||||
register_flag_optional(ONE_TBB_DIR
|
||||
"Absolute path to oneTBB (with header `onetbb/tbb.h`) distribution, the directory should contain at least `include/` and `lib/.
|
||||
If unspecified, the system TBB (with header `tbb/tbb.h`) will be used via CMake's find_package(TBB)."
|
||||
If unspecified, the system TBB (with header `tbb/tbb.h`) will be used via CMake's find_package(TBB)."
|
||||
"")
|
||||
|
||||
|
||||
@ -15,15 +15,22 @@ register_flag_optional(PARTITIONER
|
||||
See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners for more details."
|
||||
"AUTO")
|
||||
|
||||
register_flag_optional(USE_VECTOR
|
||||
"Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
|
||||
"OFF")
|
||||
|
||||
macro(setup)
|
||||
if(ONE_TBB_DIR)
|
||||
set(TBB_ROOT "${ONE_TBB_DIR}") # see https://github.com/Kitware/VTK/blob/0a31a9a3c1531ae238ac96a372fec4be42282863/CMake/FindTBB.cmake#L34
|
||||
# docs on Intel's website refers to TBB_DIR which is not correct
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
# see https://github.com/oneapi-src/oneTBB/blob/master/cmake/README.md#tbbconfig---integration-of-binary-packages
|
||||
find_package(TBB REQUIRED)
|
||||
register_link_library(TBB::tbb)
|
||||
register_definitions(PARTITIONER_${PARTITIONER})
|
||||
if(USE_VECTOR)
|
||||
register_definitions(USE_VECTOR)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user