diff --git a/CHANGELOG.md b/CHANGELOG.md index 13de588..f343174 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,8 @@ All notable changes to this project will be documented in this file. ## [Unreleased] ### Added -- New implementation using the C++ parallel STL. +- New implementation using the C++ parallel STL (C++17). +- New implementation using C++20 range factories and `for_each_n`. - Compiler options for OpenMP and OpenACC GNU offloading to NVIDIA and AMD. - Compiler options for Arm Clang added to OpenMP and Kokkos. - Kokkos 3 build system (No code changes made). diff --git a/STD20.make b/STD20.make new file mode 100644 index 0000000..eced9f7 --- /dev/null +++ b/STD20.make @@ -0,0 +1,26 @@ + +ifndef COMPILER +define compiler_help +Set COMPILER to change flags (defaulting to GNU). +Available compilers are: + GNU + +endef +$(info $(compiler_help)) +COMPILER=GNU +endif + +COMPILER_GNU = g++ +CXX = $(COMPILER_$(COMPILER)) + +FLAGS_GNU = -O3 -std=c++2a -march=native +CXXFLAGS = $(FLAGS_$(COMPILER)) + + +std20-stream: main.cpp STD20Stream.cpp + $(CXX) -DSTD20 $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ + +.PHONY: clean +clean: + rm -f std20-stream + diff --git a/STD20Stream.cpp b/STD20Stream.cpp new file mode 100644 index 0000000..a7596f6 --- /dev/null +++ b/STD20Stream.cpp @@ -0,0 +1,124 @@ +// Copyright (c) 2020 Tom Deakin +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#include "STD20Stream.hpp" + +#include +#include +#include + +template +STD20Stream::STD20Stream(const unsigned int ARRAY_SIZE, int device) + : array_size{ARRAY_SIZE} +{ + a = std::vector(array_size); + b = std::vector(array_size); + c = std::vector(array_size); +} + +template +void STD20Stream::init_arrays(T initA, T initB, T initC) +{ + std::for_each_n( + std::execution::par_unseq, + std::views::iota(0).begin(), array_size, // loop range + [&] (int i) { + a[i] = initA; + b[i] = initB; + c[i] = initC; + } + ); +} + +template +void STD20Stream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) +{ + // Element-wise copy. + h_a = a; + h_b = b; + h_c = c; +} + +template +void STD20Stream::copy() +{ + std::for_each_n( + std::execution::par_unseq, + std::views::iota(0).begin(), array_size, + [&] (int i) { + c[i] = a[i]; + } + ); +} + +template +void STD20Stream::mul() +{ + const T scalar = startScalar; + + std::for_each_n( + std::execution::par_unseq, + std::views::iota(0).begin(), array_size, + [&] (int i) { + b[i] = scalar * c[i]; + } + ); +} + +template +void STD20Stream::add() +{ + std::for_each_n( + std::execution::par_unseq, + std::views::iota(0).begin(), array_size, + [&] (int i) { + c[i] = a[i] + b[i]; + } + ); +} + +template +void STD20Stream::triad() +{ + const T scalar = startScalar; + + std::for_each_n( + std::execution::par_unseq, + std::views::iota(0).begin(), array_size, + [&] (int i) { + a[i] = b[i] + scalar * c[i]; + } + ); +} + +template +T STD20Stream::dot() +{ + // sum += a[i] * b[i]; + return + std::transform_reduce( + std::execution::par_unseq, + a.begin(), a.end(), b.begin(), 0.0); +} + +void listDevices(void) +{ + std::cout << "C++20 does not expose devices" << std::endl; +} + +std::string getDeviceName(const int) +{ + return std::string("Device name unavailable"); +} + +std::string getDeviceDriver(const int) +{ + return std::string("Device driver unavailable"); +} + +template class STD20Stream; +template class STD20Stream; + diff --git a/STD20Stream.hpp b/STD20Stream.hpp new file mode 100644 index 0000000..db78784 --- /dev/null +++ b/STD20Stream.hpp @@ -0,0 +1,42 @@ +// Copyright (c) 2020 Tom Deakin +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#pragma once + +#include +#include + +#include "Stream.h" + +#define IMPLEMENTATION_STRING "C++20" + +template +class STD20Stream : public Stream +{ + protected: + // Size of arrays + unsigned int array_size; + + // Device side pointers + std::vector a; + std::vector b; + std::vector c; + + public: + STD20Stream(const unsigned int, int); + ~STD20Stream() = default; + + virtual void copy() override; + virtual void add() override; + virtual void mul() override; + virtual void triad() override; + virtual T dot() override; + + virtual void init_arrays(T initA, T initB, T initC) override; + virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; + +}; + diff --git a/main.cpp b/main.cpp index c7e805b..2816009 100644 --- a/main.cpp +++ b/main.cpp @@ -23,6 +23,8 @@ #include "CUDAStream.h" #elif defined(STD) #include "STDStream.h" +#elif defined(STD20) +#include "STD20Stream.hpp" #elif defined(HIP) #include "HIPStream.h" #elif defined(HC) @@ -160,6 +162,10 @@ void run() // Use the STD implementation stream = new STDStream(ARRAY_SIZE, deviceIndex); +#elif defined(STD20) + // Use the C++20 implementation + stream = new STD20Stream(ARRAY_SIZE, deviceIndex); + #elif defined(ACC) // Use the OpenACC implementation stream = new ACCStream(ARRAY_SIZE, deviceIndex); @@ -368,6 +374,10 @@ void run_triad() // Use the STD implementation stream = new STDStream(ARRAY_SIZE, deviceIndex); +#elif defined(STD20) + // Use the C++20 implementation + stream = new STD20Stream(ARRAY_SIZE, deviceIndex); + #elif defined(SYCL) // Use the SYCL implementation stream = new SYCLStream(ARRAY_SIZE, deviceIndex);