Split implementation for index/data oriented std variants
Fix missing range iterator functions for std-indices Rename std20 to std-ranges
This commit is contained in:
parent
881c0cc706
commit
f9bba3c0de
@ -34,9 +34,9 @@ endmacro()
|
||||
#set(CUDA_ARCH sm_70)
|
||||
#set(BLT_DIR /home/tom/Downloads/blt-0.3.6/)
|
||||
|
||||
#set(MODEL STD)
|
||||
#set(ARCH cc70)
|
||||
#set(CXX_EXTRA_FLAGS -v)
|
||||
#set(MODEL std-data)
|
||||
#set(CMAKE_CXX_COMPILER /home/tom/Downloads/nvhpc_2021_219_Linux_x86_64_cuda_multi/install_components/Linux_x86_64/21.9/compilers/bin/nvc++)
|
||||
#set(NVHPC_OFFLOAD "cuda11.4,cc61")
|
||||
|
||||
#set(MODEL CUDA)
|
||||
#set(ARCH sm_70)
|
||||
@ -117,8 +117,9 @@ include(cmake/register_models.cmake)
|
||||
# register out models <model_name> <preprocessor_def_name> <source files...>
|
||||
register_model(omp OMP OMPStream.cpp)
|
||||
register_model(ocl OCL OCLStream.cpp)
|
||||
register_model(std STD STDStream.cpp)
|
||||
register_model(std20 STD20 STD20Stream.cpp)
|
||||
register_model(std-data STD_DATA STDDataStream.cpp)
|
||||
register_model(std-indices STD_INDICES STDIndicesStream.cpp)
|
||||
register_model(std-ranges STD_RANGES STDRangesStream.cpp)
|
||||
register_model(hip HIP HIPStream.cpp)
|
||||
register_model(cuda CUDA CUDAStream.cu)
|
||||
register_model(kokkos KOKKOS KokkosStream.cpp)
|
||||
|
||||
@ -115,7 +115,7 @@ run_build() {
|
||||
# GCC_STD_PAR_LIB="tbb"
|
||||
# CLANG_STD_PAR_LIB="tbb"
|
||||
# GCC_OMP_OFFLOAD_AMD=false
|
||||
# GCC_OMP_OFFLOAD_NVIDIA=true
|
||||
# GCC_OMP_OFFLOAD_NVIDIA=false
|
||||
# CLANG_OMP_OFFLOAD_AMD=false
|
||||
# CLANG_OMP_OFFLOAD_NVIDIA=false
|
||||
###
|
||||
@ -136,8 +136,9 @@ build_gcc() {
|
||||
fi
|
||||
|
||||
# some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
|
||||
run_build $name "${GCC_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||
run_build $name "${GCC_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||
run_build $name "${GCC_CXX:?}" std-data "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||
run_build $name "${GCC_CXX:?}" std-indices "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||
run_build $name "${GCC_CXX:?}" std-ranges "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||
|
||||
run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
|
||||
run_build $name "${GCC_CXX:?}" tbb "$cxx" # build TBB again with the system TBB
|
||||
@ -211,7 +212,8 @@ build_clang() {
|
||||
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
||||
run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||
run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||
run_build $name "${CLANG_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||
run_build $name "${CLANG_CXX:?}" std-data "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||
run_build $name "${CLANG_CXX:?}" std-indices "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||
# run_build $name "${LANG_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported
|
||||
run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
||||
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
|
||||
@ -219,7 +221,8 @@ build_clang() {
|
||||
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
||||
run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||
run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||
run_build $name "${CLANG_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||
run_build $name "${CLANG_CXX:?}" std-data "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||
run_build $name "${CLANG_CXX:?}" std-indices "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||
# run_build $name "${LANG_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported
|
||||
|
||||
run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
|
||||
@ -232,7 +235,8 @@ build_clang() {
|
||||
build_nvhpc() {
|
||||
local name="nvhpc_build"
|
||||
local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}"
|
||||
run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
|
||||
run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
|
||||
run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
|
||||
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
|
||||
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
|
||||
}
|
||||
|
||||
26
src/main.cpp
26
src/main.cpp
@ -21,10 +21,12 @@
|
||||
|
||||
#if defined(CUDA)
|
||||
#include "CUDAStream.h"
|
||||
#elif defined(STD)
|
||||
#include "STDStream.h"
|
||||
#elif defined(STD20)
|
||||
#include "STD20Stream.hpp"
|
||||
#elif defined(STD_DATA)
|
||||
#include "STDDataStream.h"
|
||||
#elif defined(STD_INDICES)
|
||||
#include "STDIndicesStream.h"
|
||||
#elif defined(STD_RANGES)
|
||||
#include "STDRangesStream.hpp"
|
||||
#elif defined(TBB)
|
||||
#include "TBBStream.hpp"
|
||||
#elif defined(THRUST)
|
||||
@ -264,13 +266,17 @@ void run()
|
||||
// Use the Kokkos implementation
|
||||
stream = new KokkosStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
|
||||
#elif defined(STD)
|
||||
// Use the STD implementation
|
||||
stream = new STDStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
#elif defined(STD_DATA)
|
||||
// Use the C++ STD data-oriented implementation
|
||||
stream = new STDDataStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
|
||||
#elif defined(STD20)
|
||||
// Use the C++20 implementation
|
||||
stream = new STD20Stream<T>(ARRAY_SIZE, deviceIndex);
|
||||
#elif defined(STD_INDICES)
|
||||
// Use the C++ STD index-oriented implementation
|
||||
stream = new STDIndicesStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
|
||||
#elif defined(STD_RANGES)
|
||||
// Use the C++ STD ranges implementation
|
||||
stream = new STDRangesStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
|
||||
#elif defined(TBB)
|
||||
// Use the C++20 implementation
|
||||
|
||||
103
src/std-data/STDDataStream.cpp
Normal file
103
src/std-data/STDDataStream.cpp
Normal file
@ -0,0 +1,103 @@
|
||||
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#include "STDDataStream.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
#include <numeric>
|
||||
|
||||
// There are three execution policies:
|
||||
// auto exe_policy = std::execution::seq;
|
||||
// auto exe_policy = std::execution::par;
|
||||
auto exe_policy = std::execution::par_unseq;
|
||||
|
||||
|
||||
template <class T>
|
||||
STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
|
||||
noexcept : array_size{ARRAY_SIZE}, a(array_size), b(array_size), c(array_size)
|
||||
{
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
{
|
||||
std::fill(exe_policy, a.begin(), a.end(), initA);
|
||||
std::fill(exe_policy, b.begin(), b.end(), initB);
|
||||
std::fill(exe_policy, c.begin(), c.end(), initC);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
h_a = a;
|
||||
h_b = b;
|
||||
h_c = c;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::copy()
|
||||
{
|
||||
// c[i] = a[i]
|
||||
std::copy(exe_policy, a.begin(), a.end(), c.begin());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::mul()
|
||||
{
|
||||
// b[i] = scalar * c[i];
|
||||
std::transform(exe_policy, c.begin(), c.end(), b.begin(), [scalar = startScalar](T ci){ return scalar*ci; });
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::add()
|
||||
{
|
||||
// c[i] = a[i] + b[i];
|
||||
std::transform(exe_policy, a.begin(), a.end(), b.begin(), c.begin(), std::plus<T>());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::triad()
|
||||
{
|
||||
// a[i] = b[i] + scalar * c[i];
|
||||
std::transform(exe_policy, b.begin(), b.end(), c.begin(), a.begin(), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDDataStream<T>::nstream()
|
||||
{
|
||||
// a[i] += b[i] + scalar * c[i];
|
||||
// Need to do in two stages with C++11 STL.
|
||||
// 1: a[i] += b[i]
|
||||
// 2: a[i] += scalar * c[i];
|
||||
std::transform(exe_policy, a.begin(), a.end(), b.begin(), a.begin(), [](T ai, T bi){ return ai + bi; });
|
||||
std::transform(exe_policy, a.begin(), a.end(), c.begin(), a.begin(), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
T STDDataStream<T>::dot()
|
||||
{
|
||||
// sum = 0; sum += a[i]*b[i]; return sum;
|
||||
return std::transform_reduce(exe_policy, a.begin(), a.end(), b.begin(), 0.0);
|
||||
}
|
||||
|
||||
void listDevices(void)
|
||||
{
|
||||
std::cout << "Listing devices is not supported by the Parallel STL" << std::endl;
|
||||
}
|
||||
|
||||
std::string getDeviceName(const int)
|
||||
{
|
||||
return std::string("Device name unavailable");
|
||||
}
|
||||
|
||||
std::string getDeviceDriver(const int)
|
||||
{
|
||||
return std::string("Device driver unavailable");
|
||||
}
|
||||
template class STDDataStream<float>;
|
||||
template class STDDataStream<double>;
|
||||
|
||||
42
src/std-data/STDDataStream.h
Normal file
42
src/std-data/STDDataStream.h
Normal file
@ -0,0 +1,42 @@
|
||||
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include "Stream.h"
|
||||
|
||||
#define IMPLEMENTATION_STRING "STD (data-oriented)"
|
||||
|
||||
|
||||
template <class T>
|
||||
class STDDataStream : public Stream<T>
|
||||
{
|
||||
protected:
|
||||
// Size of arrays
|
||||
int array_size;
|
||||
|
||||
// Device side pointers
|
||||
std::vector<T> a;
|
||||
std::vector<T> b;
|
||||
std::vector<T> c;
|
||||
|
||||
|
||||
public:
|
||||
STDDataStream(const int, int) noexcept;
|
||||
~STDDataStream() = default;
|
||||
|
||||
virtual void copy() override;
|
||||
virtual void add() override;
|
||||
virtual void mul() override;
|
||||
virtual void triad() override;
|
||||
virtual void nstream() override;
|
||||
virtual T dot() override;
|
||||
|
||||
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||
};
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#include "STDStream.h"
|
||||
#include "STDIndicesStream.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
@ -16,13 +16,13 @@ auto exe_policy = std::execution::par_unseq;
|
||||
|
||||
|
||||
template <class T>
|
||||
STDStream<T>::STDStream(const int ARRAY_SIZE, int device)
|
||||
STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device)
|
||||
noexcept : array_size{ARRAY_SIZE}, range(0, array_size), a(array_size), b(array_size), c(array_size)
|
||||
{
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
{
|
||||
std::fill(exe_policy, a.begin(), a.end(), initA);
|
||||
std::fill(exe_policy, b.begin(), b.end(), initB);
|
||||
@ -30,7 +30,7 @@ void STDStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
void STDIndicesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
h_a = a;
|
||||
h_b = b;
|
||||
@ -38,14 +38,14 @@ void STDStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::ve
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDStream<T>::copy()
|
||||
void STDIndicesStream<T>::copy()
|
||||
{
|
||||
// c[i] = a[i]
|
||||
std::copy(exe_policy, a.begin(), a.end(), c.begin());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDStream<T>::mul()
|
||||
void STDIndicesStream<T>::mul()
|
||||
{
|
||||
// b[i] = scalar * c[i];
|
||||
std::transform(exe_policy, range.begin(), range.end(), b.begin(), [&, scalar = startScalar](int i) {
|
||||
@ -54,7 +54,7 @@ void STDStream<T>::mul()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDStream<T>::add()
|
||||
void STDIndicesStream<T>::add()
|
||||
{
|
||||
// c[i] = a[i] + b[i];
|
||||
std::transform(exe_policy, range.begin(), range.end(), c.begin(), [&](int i) {
|
||||
@ -63,7 +63,7 @@ void STDStream<T>::add()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDStream<T>::triad()
|
||||
void STDIndicesStream<T>::triad()
|
||||
{
|
||||
// a[i] = b[i] + scalar * c[i];
|
||||
std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) {
|
||||
@ -72,7 +72,7 @@ void STDStream<T>::triad()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STDStream<T>::nstream()
|
||||
void STDIndicesStream<T>::nstream()
|
||||
{
|
||||
// a[i] += b[i] + scalar * c[i];
|
||||
// Need to do in two stages with C++11 STL.
|
||||
@ -85,7 +85,7 @@ void STDStream<T>::nstream()
|
||||
|
||||
|
||||
template <class T>
|
||||
T STDStream<T>::dot()
|
||||
T STDIndicesStream<T>::dot()
|
||||
{
|
||||
// sum = 0; sum += a[i]*b[i]; return sum;
|
||||
return std::transform_reduce(exe_policy, a.begin(), a.end(), b.begin(), 0.0);
|
||||
@ -105,6 +105,6 @@ std::string getDeviceDriver(const int)
|
||||
{
|
||||
return std::string("Device driver unavailable");
|
||||
}
|
||||
template class STDStream<float>;
|
||||
template class STDStream<double>;
|
||||
template class STDIndicesStream<float>;
|
||||
template class STDIndicesStream<double>;
|
||||
|
||||
@ -9,9 +9,9 @@
|
||||
#include <stdexcept>
|
||||
#include "Stream.h"
|
||||
|
||||
#define IMPLEMENTATION_STRING "STD"
|
||||
#define IMPLEMENTATION_STRING "STD (index-oriented)"
|
||||
|
||||
template <typename N = size_t>
|
||||
template <typename N>
|
||||
class ranged {
|
||||
N from, to;
|
||||
public:
|
||||
@ -22,13 +22,18 @@ public:
|
||||
using difference_type = N;
|
||||
using value_type = N;
|
||||
using pointer = const N*;
|
||||
using reference = N&;
|
||||
using reference = const N&;
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
explicit iterator(N _num = 0) : num(_num) {}
|
||||
|
||||
iterator& operator++() { num++; return *this; }
|
||||
iterator operator++(int) { iterator retval = *this; ++(*this); return retval; }
|
||||
iterator operator+(const value_type v) const { return iterator(num + v); }
|
||||
|
||||
bool operator==(iterator other) const { return num == other.num; }
|
||||
bool operator!=(iterator other) const { return *this != other; }
|
||||
bool operator<(iterator other) const { return num < other.num; }
|
||||
|
||||
reference operator*() const { return num;}
|
||||
difference_type operator-(const iterator &it) const { return num - it.num; }
|
||||
value_type operator[](const difference_type &i) const { return num + i; }
|
||||
@ -39,7 +44,7 @@ public:
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class STDStream : public Stream<T>
|
||||
class STDIndicesStream : public Stream<T>
|
||||
{
|
||||
protected:
|
||||
// Size of arrays
|
||||
@ -55,8 +60,8 @@ class STDStream : public Stream<T>
|
||||
|
||||
|
||||
public:
|
||||
STDStream(const int, int) noexcept;
|
||||
~STDStream() = default;
|
||||
STDIndicesStream(const int, int) noexcept;
|
||||
~STDIndicesStream() = default;
|
||||
|
||||
virtual void copy() override;
|
||||
virtual void add() override;
|
||||
33
src/std-indices/model.cmake
Normal file
33
src/std-indices/model.cmake
Normal file
@ -0,0 +1,33 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(NVHPC_OFFLOAD
|
||||
"Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
|
||||
The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
|
||||
|
||||
Possible values are:
|
||||
cc35 - Compile for compute capability 3.5
|
||||
cc50 - Compile for compute capability 5.0
|
||||
cc60 - Compile for compute capability 6.0
|
||||
cc62 - Compile for compute capability 6.2
|
||||
cc70 - Compile for compute capability 7.0
|
||||
cc72 - Compile for compute capability 7.2
|
||||
cc75 - Compile for compute capability 7.5
|
||||
cc80 - Compile for compute capability 8.0
|
||||
ccall - Compile for all supported compute capabilities"
|
||||
"")
|
||||
|
||||
macro(setup)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
if (NVHPC_OFFLOAD)
|
||||
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
||||
# propagate flags to linker so that it links with the gpu stuff as well
|
||||
register_append_cxx_flags(ANY ${NVHPC_FLAGS})
|
||||
register_append_link_flags(${NVHPC_FLAGS})
|
||||
endif ()
|
||||
|
||||
|
||||
endmacro()
|
||||
@ -4,14 +4,14 @@
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#include "STD20Stream.hpp"
|
||||
#include "STDRangesStream.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
#include <ranges>
|
||||
|
||||
template <class T>
|
||||
STD20Stream<T>::STD20Stream(const int ARRAY_SIZE, int device)
|
||||
STDRangesStream<T>::STDRangesStream(const int ARRAY_SIZE, int device)
|
||||
: array_size{ARRAY_SIZE}
|
||||
{
|
||||
a = std::vector<T>(array_size);
|
||||
@ -20,7 +20,7 @@ STD20Stream<T>::STD20Stream(const int ARRAY_SIZE, int device)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STD20Stream<T>::init_arrays(T initA, T initB, T initC)
|
||||
void STDRangesStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
{
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
@ -34,7 +34,7 @@ void STD20Stream<T>::init_arrays(T initA, T initB, T initC)
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STD20Stream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
void STDRangesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
// Element-wise copy.
|
||||
h_a = a;
|
||||
@ -43,7 +43,7 @@ void STD20Stream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STD20Stream<T>::copy()
|
||||
void STDRangesStream<T>::copy()
|
||||
{
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
@ -55,7 +55,7 @@ void STD20Stream<T>::copy()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STD20Stream<T>::mul()
|
||||
void STDRangesStream<T>::mul()
|
||||
{
|
||||
const T scalar = startScalar;
|
||||
|
||||
@ -69,7 +69,7 @@ void STD20Stream<T>::mul()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STD20Stream<T>::add()
|
||||
void STDRangesStream<T>::add()
|
||||
{
|
||||
std::for_each_n(
|
||||
std::execution::par_unseq,
|
||||
@ -81,7 +81,7 @@ void STD20Stream<T>::add()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STD20Stream<T>::triad()
|
||||
void STDRangesStream<T>::triad()
|
||||
{
|
||||
const T scalar = startScalar;
|
||||
|
||||
@ -95,7 +95,7 @@ void STD20Stream<T>::triad()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void STD20Stream<T>::nstream()
|
||||
void STDRangesStream<T>::nstream()
|
||||
{
|
||||
const T scalar = startScalar;
|
||||
|
||||
@ -109,7 +109,7 @@ void STD20Stream<T>::nstream()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T STD20Stream<T>::dot()
|
||||
T STDRangesStream<T>::dot()
|
||||
{
|
||||
// sum += a[i] * b[i];
|
||||
return
|
||||
@ -133,6 +133,6 @@ std::string getDeviceDriver(const int)
|
||||
return std::string("Device driver unavailable");
|
||||
}
|
||||
|
||||
template class STD20Stream<float>;
|
||||
template class STD20Stream<double>;
|
||||
template class STDRangesStream<float>;
|
||||
template class STDRangesStream<double>;
|
||||
|
||||
@ -11,10 +11,10 @@
|
||||
|
||||
#include "Stream.h"
|
||||
|
||||
#define IMPLEMENTATION_STRING "C++20"
|
||||
#define IMPLEMENTATION_STRING "STD C++ ranges"
|
||||
|
||||
template <class T>
|
||||
class STD20Stream : public Stream<T>
|
||||
class STDRangesStream : public Stream<T>
|
||||
{
|
||||
protected:
|
||||
// Size of arrays
|
||||
@ -26,8 +26,8 @@ class STD20Stream : public Stream<T>
|
||||
std::vector<T> c;
|
||||
|
||||
public:
|
||||
STD20Stream(const int, int);
|
||||
~STD20Stream() = default;
|
||||
STDRangesStream(const int, int);
|
||||
~STDRangesStream() = default;
|
||||
|
||||
virtual void copy() override;
|
||||
virtual void add() override;
|
||||
Loading…
Reference in New Issue
Block a user