diff --git a/CMakeLists.txt b/CMakeLists.txt index a0e10a8..ecf2fd4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(BabelStream VERSION 3.5 LANGUAGES CXX) +# uncomment for debugging build issues: #set(CMAKE_VERBOSE_MAKEFILE ON) # some nicer defaults for standard C++ @@ -23,45 +24,6 @@ macro(setup_opencl_header_includes) endif () endmacro() -#set(MODEL SYCL) -#set(SYCL_COMPILER COMPUTECPP) -#set(SYCL_COMPILER_DIR /home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu) -#set(MODEL RAJA) -#set(RAJA_IN_TREE /home/tom/Downloads/RAJA-v0.13.0/) -#set(ENABLE_CUDA ON) -#set(TARGET NVIDIA) -#set(CUDA_TOOLKIT_ROOT_DIR /opt/cuda-11.2) -#set(CUDA_ARCH sm_70) -#set(BLT_DIR /home/tom/Downloads/blt-0.3.6/) - -#set(MODEL STD) -#set(ARCH cc70) -#set(CXX_EXTRA_FLAGS -v) - -#set(MODEL CUDA) -#set(ARCH sm_70) -#set(CMAKE_CUDA_COMPILER /opt/cuda-11.2/bin/nvcc) - -#set(MODEL OCL) -#set(OpenCL_LIBRARY /opt/rocm-4.0.0/opencl/lib/libOpenCL.so) -#set(OpenCL_INCLUDE_DIR /opt/rocm-4.0.0/opencl/lib) -#set(RELEASE_FLAGS -Ofast) -#set(CXX_EXTRA_FLAGS -O2) - -#set(CMAKE_CXX_COMPILER /usr/lib/aomp/bin/clang++) -#set(MODEL omp) -##set(OFFLOAD "AMD:gfx803") -#set(OFFLOAD "NVIDIA:sm_35") -#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/) - -#set(OFFLOAD "AMD:_70") -#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/ --gcc-toolchain=/home/tom/spack/opt/spack/linux-fedora33-zen2/gcc-10.2.1/gcc-8.3.0-latmjo2hl2yv53255xkwko7k3y7bx2vv) -#set(CXX_EXTRA_LINKER_FLAGS ) -#set(MODEL HIP) - -#set(MODEL KOKKOS) -#set(KOKKOS_IN_TREE /home/tom/Downloads/kokkos-3.3.00/) - # the final executable name set(EXE_NAME babelstream) @@ -115,8 +77,9 @@ include(cmake/register_models.cmake) # register out models register_model(omp OMP OMPStream.cpp) register_model(ocl OCL OCLStream.cpp) -register_model(std STD STDStream.cpp) -register_model(std20 STD20 STD20Stream.cpp) +register_model(std-data STD_DATA STDDataStream.cpp) +register_model(std-indices STD_INDICES STDIndicesStream.cpp) +register_model(std-ranges STD_RANGES STDRangesStream.cpp) register_model(hip HIP HIPStream.cpp) register_model(cuda CUDA CUDAStream.cu) register_model(kokkos KOKKOS KokkosStream.cpp) diff --git a/src/ci-test-compile.sh b/src/ci-test-compile.sh index 3f54aaf..9388643 100755 --- a/src/ci-test-compile.sh +++ b/src/ci-test-compile.sh @@ -115,7 +115,7 @@ run_build() { # GCC_STD_PAR_LIB="tbb" # CLANG_STD_PAR_LIB="tbb" # GCC_OMP_OFFLOAD_AMD=false -# GCC_OMP_OFFLOAD_NVIDIA=true +# GCC_OMP_OFFLOAD_NVIDIA=false # CLANG_OMP_OFFLOAD_AMD=false # CLANG_OMP_OFFLOAD_NVIDIA=false ### @@ -136,8 +136,9 @@ build_gcc() { fi # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here - run_build $name "${GCC_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" - run_build $name "${GCC_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" + run_build $name "${GCC_CXX:?}" std-data "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" + run_build $name "${GCC_CXX:?}" std-indices "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" + run_build $name "${GCC_CXX:?}" std-ranges "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB" run_build $name "${GCC_CXX:?}" tbb "$cxx" # build TBB again with the system TBB @@ -211,7 +212,8 @@ build_clang() { run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" - run_build $name "${CLANG_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" + run_build $name "${CLANG_CXX:?}" std-data "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" + run_build $name "${CLANG_CXX:?}" std-indices "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # run_build $name "${LANG_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" @@ -219,7 +221,8 @@ build_clang() { run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" - run_build $name "${CLANG_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" + run_build $name "${CLANG_CXX:?}" std-data "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" + run_build $name "${CLANG_CXX:?}" std-indices "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # run_build $name "${LANG_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB" @@ -232,7 +235,8 @@ build_clang() { build_nvhpc() { local name="nvhpc_build" local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}" - run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" + run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" + run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY" run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen" } diff --git a/src/main.cpp b/src/main.cpp index 5a01b74..13a0021 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -21,10 +21,12 @@ #if defined(CUDA) #include "CUDAStream.h" -#elif defined(STD) -#include "STDStream.h" -#elif defined(STD20) -#include "STD20Stream.hpp" +#elif defined(STD_DATA) +#include "STDDataStream.h" +#elif defined(STD_INDICES) +#include "STDIndicesStream.h" +#elif defined(STD_RANGES) +#include "STDRangesStream.hpp" #elif defined(TBB) #include "TBBStream.hpp" #elif defined(THRUST) @@ -264,13 +266,17 @@ void run() // Use the Kokkos implementation stream = new KokkosStream(ARRAY_SIZE, deviceIndex); -#elif defined(STD) - // Use the STD implementation - stream = new STDStream(ARRAY_SIZE, deviceIndex); +#elif defined(STD_DATA) + // Use the C++ STD data-oriented implementation + stream = new STDDataStream(ARRAY_SIZE, deviceIndex); -#elif defined(STD20) - // Use the C++20 implementation - stream = new STD20Stream(ARRAY_SIZE, deviceIndex); +#elif defined(STD_INDICES) + // Use the C++ STD index-oriented implementation + stream = new STDIndicesStream(ARRAY_SIZE, deviceIndex); + +#elif defined(STD_RANGES) + // Use the C++ STD ranges implementation + stream = new STDRangesStream(ARRAY_SIZE, deviceIndex); #elif defined(TBB) // Use the C++20 implementation diff --git a/src/std-data/STDDataStream.cpp b/src/std-data/STDDataStream.cpp new file mode 100644 index 0000000..343e247 --- /dev/null +++ b/src/std-data/STDDataStream.cpp @@ -0,0 +1,104 @@ +// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +// Updated 2021 by University of Bristol +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#include "STDDataStream.h" + +#include +#include +#include + +// There are three execution policies: +// auto exe_policy = std::execution::seq; +// auto exe_policy = std::execution::par; +auto exe_policy = std::execution::par_unseq; + + +template +STDDataStream::STDDataStream(const int ARRAY_SIZE, int device) + noexcept : array_size{ARRAY_SIZE}, a(array_size), b(array_size), c(array_size) +{ +} + +template +void STDDataStream::init_arrays(T initA, T initB, T initC) +{ + std::fill(exe_policy, a.begin(), a.end(), initA); + std::fill(exe_policy, b.begin(), b.end(), initB); + std::fill(exe_policy, c.begin(), c.end(), initC); +} + +template +void STDDataStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) +{ + h_a = a; + h_b = b; + h_c = c; +} + +template +void STDDataStream::copy() +{ + // c[i] = a[i] + std::copy(exe_policy, a.begin(), a.end(), c.begin()); +} + +template +void STDDataStream::mul() +{ + // b[i] = scalar * c[i]; + std::transform(exe_policy, c.begin(), c.end(), b.begin(), [scalar = startScalar](T ci){ return scalar*ci; }); +} + +template +void STDDataStream::add() +{ + // c[i] = a[i] + b[i]; + std::transform(exe_policy, a.begin(), a.end(), b.begin(), c.begin(), std::plus()); +} + +template +void STDDataStream::triad() +{ + // a[i] = b[i] + scalar * c[i]; + std::transform(exe_policy, b.begin(), b.end(), c.begin(), a.begin(), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; }); +} + +template +void STDDataStream::nstream() +{ + // a[i] += b[i] + scalar * c[i]; + // Need to do in two stages with C++11 STL. + // 1: a[i] += b[i] + // 2: a[i] += scalar * c[i]; + std::transform(exe_policy, a.begin(), a.end(), b.begin(), a.begin(), [](T ai, T bi){ return ai + bi; }); + std::transform(exe_policy, a.begin(), a.end(), c.begin(), a.begin(), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; }); +} + + +template +T STDDataStream::dot() +{ + // sum = 0; sum += a[i]*b[i]; return sum; + return std::transform_reduce(exe_policy, a.begin(), a.end(), b.begin(), 0.0); +} + +void listDevices(void) +{ + std::cout << "Listing devices is not supported by the Parallel STL" << std::endl; +} + +std::string getDeviceName(const int) +{ + return std::string("Device name unavailable"); +} + +std::string getDeviceDriver(const int) +{ + return std::string("Device driver unavailable"); +} +template class STDDataStream; +template class STDDataStream; + diff --git a/src/std/STDStream.h b/src/std-data/STDDataStream.h similarity index 65% rename from src/std/STDStream.h rename to src/std-data/STDDataStream.h index 2249812..741fd6c 100644 --- a/src/std/STDStream.h +++ b/src/std-data/STDDataStream.h @@ -1,4 +1,5 @@ -// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. +// Updated 2021 by University of Bristol // // For full license terms please see the LICENSE file distributed with this // source code @@ -9,23 +10,25 @@ #include #include "Stream.h" -#define IMPLEMENTATION_STRING "STD" +#define IMPLEMENTATION_STRING "STD (data-oriented)" + template -class STDStream : public Stream +class STDDataStream : public Stream { protected: // Size of arrays int array_size; // Device side pointers - T *a; - T *b; - T *c; + std::vector a; + std::vector b; + std::vector c; + public: - STDStream(const int, int) noexcept; - ~STDStream(); + STDDataStream(const int, int) noexcept; + ~STDDataStream() = default; virtual void copy() override; virtual void add() override; diff --git a/src/std/model.cmake b/src/std-data/model.cmake similarity index 100% rename from src/std/model.cmake rename to src/std-data/model.cmake diff --git a/src/std-indices/STDIndicesStream.cpp b/src/std-indices/STDIndicesStream.cpp new file mode 100644 index 0000000..2221f90 --- /dev/null +++ b/src/std-indices/STDIndicesStream.cpp @@ -0,0 +1,111 @@ +// Copyright (c) 2021 Tom Deakin and Tom Lin +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#include "STDIndicesStream.h" + +#include +#include +#include + +// There are three execution policies: +// auto exe_policy = std::execution::seq; +// auto exe_policy = std::execution::par; +auto exe_policy = std::execution::par_unseq; + + +template +STDIndicesStream::STDIndicesStream(const int ARRAY_SIZE, int device) + noexcept : array_size{ARRAY_SIZE}, range(0, array_size), a(array_size), b(array_size), c(array_size) +{ +} + +template +void STDIndicesStream::init_arrays(T initA, T initB, T initC) +{ + std::fill(exe_policy, a.begin(), a.end(), initA); + std::fill(exe_policy, b.begin(), b.end(), initB); + std::fill(exe_policy, c.begin(), c.end(), initC); +} + +template +void STDIndicesStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) +{ + h_a = a; + h_b = b; + h_c = c; +} + +template +void STDIndicesStream::copy() +{ + // c[i] = a[i] + std::copy(exe_policy, a.begin(), a.end(), c.begin()); +} + +template +void STDIndicesStream::mul() +{ + // b[i] = scalar * c[i]; + std::transform(exe_policy, range.begin(), range.end(), b.begin(), [&, scalar = startScalar](int i) { + return scalar * c[i]; + }); +} + +template +void STDIndicesStream::add() +{ + // c[i] = a[i] + b[i]; + std::transform(exe_policy, range.begin(), range.end(), c.begin(), [&](int i) { + return a[i] + b[i]; + }); +} + +template +void STDIndicesStream::triad() +{ + // a[i] = b[i] + scalar * c[i]; + std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) { + return b[i] + scalar * c[i]; + }); +} + +template +void STDIndicesStream::nstream() +{ + // a[i] += b[i] + scalar * c[i]; + // Need to do in two stages with C++11 STL. + // 1: a[i] += b[i] + // 2: a[i] += scalar * c[i]; + std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) { + return a[i] + b[i] + scalar * c[i]; + }); +} + + +template +T STDIndicesStream::dot() +{ + // sum = 0; sum += a[i]*b[i]; return sum; + return std::transform_reduce(exe_policy, a.begin(), a.end(), b.begin(), 0.0); +} + +void listDevices(void) +{ + std::cout << "Listing devices is not supported by the Parallel STL" << std::endl; +} + +std::string getDeviceName(const int) +{ + return std::string("Device name unavailable"); +} + +std::string getDeviceDriver(const int) +{ + return std::string("Device driver unavailable"); +} +template class STDIndicesStream; +template class STDIndicesStream; + diff --git a/src/std-indices/STDIndicesStream.h b/src/std-indices/STDIndicesStream.h new file mode 100644 index 0000000..bc068aa --- /dev/null +++ b/src/std-indices/STDIndicesStream.h @@ -0,0 +1,81 @@ +// Copyright (c) 2021 Tom Deakin and Tom Lin +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#pragma once + +#include +#include +#include "Stream.h" + +#define IMPLEMENTATION_STRING "STD (index-oriented)" + + +// A lightweight counting iterator which will be used by the STL algorithms +// NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this +// implementation doesn't target +template +class ranged { + N from, to; +public: + ranged(N from, N to ): from(from), to(to) {} + class iterator { + N num; + public: + using difference_type = N; + using value_type = N; + using pointer = const N*; + using reference = const N&; + using iterator_category = std::random_access_iterator_tag; + explicit iterator(N _num = 0) : num(_num) {} + + iterator& operator++() { num++; return *this; } + iterator operator++(int) { iterator retval = *this; ++(*this); return retval; } + iterator operator+(const value_type v) const { return iterator(num + v); } + + bool operator==(iterator other) const { return num == other.num; } + bool operator!=(iterator other) const { return *this != other; } + bool operator<(iterator other) const { return num < other.num; } + + reference operator*() const { return num;} + difference_type operator-(const iterator &it) const { return num - it.num; } + value_type operator[](const difference_type &i) const { return num + i; } + + }; + iterator begin() { return iterator(from); } + iterator end() { return iterator(to >= from? to+1 : to-1); } +}; + +template +class STDIndicesStream : public Stream +{ + protected: + // Size of arrays + int array_size; + + // induction range + ranged range; + + // Device side pointers + std::vector a; + std::vector b; + std::vector c; + + + public: + STDIndicesStream(const int, int) noexcept; + ~STDIndicesStream() = default; + + virtual void copy() override; + virtual void add() override; + virtual void mul() override; + virtual void triad() override; + virtual void nstream() override; + virtual T dot() override; + + virtual void init_arrays(T initA, T initB, T initC) override; + virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; +}; + diff --git a/src/std-indices/model.cmake b/src/std-indices/model.cmake new file mode 100644 index 0000000..ef69f30 --- /dev/null +++ b/src/std-indices/model.cmake @@ -0,0 +1,33 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection" + "c++") + +register_flag_optional(NVHPC_OFFLOAD + "Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK. + The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`) + + Possible values are: + cc35 - Compile for compute capability 3.5 + cc50 - Compile for compute capability 5.0 + cc60 - Compile for compute capability 6.0 + cc62 - Compile for compute capability 6.2 + cc70 - Compile for compute capability 7.0 + cc72 - Compile for compute capability 7.2 + cc75 - Compile for compute capability 7.5 + cc80 - Compile for compute capability 8.0 + ccall - Compile for all supported compute capabilities" + "") + +macro(setup) + set(CMAKE_CXX_STANDARD 17) + + if (NVHPC_OFFLOAD) + set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD}) + # propagate flags to linker so that it links with the gpu stuff as well + register_append_cxx_flags(ANY ${NVHPC_FLAGS}) + register_append_link_flags(${NVHPC_FLAGS}) + endif () + + +endmacro() diff --git a/src/std20/STD20Stream.cpp b/src/std-ranges/STDRangesStream.cpp similarity index 79% rename from src/std20/STD20Stream.cpp rename to src/std-ranges/STDRangesStream.cpp index 8290033..de61528 100644 --- a/src/std20/STD20Stream.cpp +++ b/src/std-ranges/STDRangesStream.cpp @@ -4,14 +4,14 @@ // For full license terms please see the LICENSE file distributed with this // source code -#include "STD20Stream.hpp" +#include "STDRangesStream.hpp" #include #include #include template -STD20Stream::STD20Stream(const int ARRAY_SIZE, int device) +STDRangesStream::STDRangesStream(const int ARRAY_SIZE, int device) : array_size{ARRAY_SIZE} { a = std::vector(array_size); @@ -20,7 +20,7 @@ STD20Stream::STD20Stream(const int ARRAY_SIZE, int device) } template -void STD20Stream::init_arrays(T initA, T initB, T initC) +void STDRangesStream::init_arrays(T initA, T initB, T initC) { std::for_each_n( std::execution::par_unseq, @@ -34,7 +34,7 @@ void STD20Stream::init_arrays(T initA, T initB, T initC) } template -void STD20Stream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) +void STDRangesStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) { // Element-wise copy. h_a = a; @@ -43,7 +43,7 @@ void STD20Stream::read_arrays(std::vector& h_a, std::vector& h_b, std:: } template -void STD20Stream::copy() +void STDRangesStream::copy() { std::for_each_n( std::execution::par_unseq, @@ -55,7 +55,7 @@ void STD20Stream::copy() } template -void STD20Stream::mul() +void STDRangesStream::mul() { const T scalar = startScalar; @@ -69,7 +69,7 @@ void STD20Stream::mul() } template -void STD20Stream::add() +void STDRangesStream::add() { std::for_each_n( std::execution::par_unseq, @@ -81,7 +81,7 @@ void STD20Stream::add() } template -void STD20Stream::triad() +void STDRangesStream::triad() { const T scalar = startScalar; @@ -95,7 +95,7 @@ void STD20Stream::triad() } template -void STD20Stream::nstream() +void STDRangesStream::nstream() { const T scalar = startScalar; @@ -109,7 +109,7 @@ void STD20Stream::nstream() } template -T STD20Stream::dot() +T STDRangesStream::dot() { // sum += a[i] * b[i]; return @@ -133,6 +133,6 @@ std::string getDeviceDriver(const int) return std::string("Device driver unavailable"); } -template class STD20Stream; -template class STD20Stream; +template class STDRangesStream; +template class STDRangesStream; diff --git a/src/std20/STD20Stream.hpp b/src/std-ranges/STDRangesStream.hpp similarity index 83% rename from src/std20/STD20Stream.hpp rename to src/std-ranges/STDRangesStream.hpp index e5daa3c..890e893 100644 --- a/src/std20/STD20Stream.hpp +++ b/src/std-ranges/STDRangesStream.hpp @@ -11,10 +11,10 @@ #include "Stream.h" -#define IMPLEMENTATION_STRING "C++20" +#define IMPLEMENTATION_STRING "STD C++ ranges" template -class STD20Stream : public Stream +class STDRangesStream : public Stream { protected: // Size of arrays @@ -26,8 +26,8 @@ class STD20Stream : public Stream std::vector c; public: - STD20Stream(const int, int); - ~STD20Stream() = default; + STDRangesStream(const int, int); + ~STDRangesStream() = default; virtual void copy() override; virtual void add() override; diff --git a/src/std20/model.cmake b/src/std-ranges/model.cmake similarity index 100% rename from src/std20/model.cmake rename to src/std-ranges/model.cmake diff --git a/src/std/STDStream.cpp b/src/std/STDStream.cpp deleted file mode 100644 index 30ad420..0000000 --- a/src/std/STDStream.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -// -// For full license terms please see the LICENSE file distributed with this -// source code - -#include "STDStream.h" - -#include -#include -#include - -// There are three execution policies: -// auto exe_policy = std::execution::seq; -// auto exe_policy = std::execution::par; -auto exe_policy = std::execution::par_unseq; - -template -STDStream::STDStream(const int ARRAY_SIZE, int device) - noexcept : array_size{ARRAY_SIZE}, a{new T[array_size]}, b{new T[array_size]}, c{new T[array_size]} -{ -} - -template -STDStream::~STDStream() -{ - delete[] a; - delete[] b; - delete[] c; -} - -template -void STDStream::init_arrays(T initA, T initB, T initC) -{ - std::fill(exe_policy, a, a+array_size, initA); - std::fill(exe_policy, b, b+array_size, initB); - std::fill(exe_policy, c, c+array_size, initC); -} - -template -void STDStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) -{ - std::copy(exe_policy, a, a+array_size, h_a.data()); - std::copy(exe_policy, b, b+array_size, h_b.data()); - std::copy(exe_policy, c, c+array_size, h_c.data()); -} - -template -void STDStream::copy() -{ - // c[i] = a[i] - std::copy(exe_policy, a, a+array_size, c) ; -} - -template -void STDStream::mul() -{ - // b[i] = scalar * c[i]; - std::transform(exe_policy, c, c+array_size, b, [](T ci){ return startScalar*ci; }); -} - -template -void STDStream::add() -{ - // c[i] = a[i] + b[i]; - std::transform(exe_policy, a, a+array_size, b, c, std::plus()); -} - -template -void STDStream::triad() -{ - // a[i] = b[i] + scalar * c[i]; - std::transform(exe_policy, b, b+array_size, c, a, [](T bi, T ci){ return bi+startScalar*ci; }); -} - -template -void STDStream::nstream() -{ - // a[i] += b[i] + scalar * c[i]; - // Need to do in two stages with C++11 STL. - // 1: a[i] += b[i] - // 2: a[i] += scalar * c[i]; - std::transform(exe_policy, a, a+array_size, b, a, [](T ai, T bi){ return ai + bi; }); - std::transform(exe_policy, a, a+array_size, c, a, [](T ai, T ci){ return ai + startScalar*ci; }); -} - -template -T STDStream::dot() -{ - // sum = 0; sum += a[i]*b[i]; return sum; - return std::transform_reduce(exe_policy, a, a+array_size, b, 0.0); -} - -void listDevices(void) -{ - std::cout << "Listing devices is not supported by the Parallel STL" << std::endl; -} - -std::string getDeviceName(const int) -{ - return std::string("Device name unavailable"); -} - -std::string getDeviceDriver(const int) -{ - return std::string("Device driver unavailable"); -} -template class STDStream; -template class STDStream; -