parent
ffa221fd35
commit
e8fb3a6be4
@ -4,7 +4,8 @@ All notable changes to this project will be documented in this file.
|
|||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- New implementation using the C++ parallel STL.
|
- New implementation using the C++ parallel STL (C++17).
|
||||||
|
- New implementation using C++20 range factories and `for_each_n`.
|
||||||
- Compiler options for OpenMP and OpenACC GNU offloading to NVIDIA and AMD.
|
- Compiler options for OpenMP and OpenACC GNU offloading to NVIDIA and AMD.
|
||||||
- Compiler options for Arm Clang added to OpenMP and Kokkos.
|
- Compiler options for Arm Clang added to OpenMP and Kokkos.
|
||||||
- Kokkos 3 build system (No code changes made).
|
- Kokkos 3 build system (No code changes made).
|
||||||
|
|||||||
26
STD20.make
Normal file
26
STD20.make
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
ifndef COMPILER
|
||||||
|
define compiler_help
|
||||||
|
Set COMPILER to change flags (defaulting to GNU).
|
||||||
|
Available compilers are:
|
||||||
|
GNU
|
||||||
|
|
||||||
|
endef
|
||||||
|
$(info $(compiler_help))
|
||||||
|
COMPILER=GNU
|
||||||
|
endif
|
||||||
|
|
||||||
|
COMPILER_GNU = g++
|
||||||
|
CXX = $(COMPILER_$(COMPILER))
|
||||||
|
|
||||||
|
FLAGS_GNU = -O3 -std=c++2a -march=native
|
||||||
|
CXXFLAGS = $(FLAGS_$(COMPILER))
|
||||||
|
|
||||||
|
|
||||||
|
std20-stream: main.cpp STD20Stream.cpp
|
||||||
|
$(CXX) -DSTD20 $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f std20-stream
|
||||||
|
|
||||||
124
STD20Stream.cpp
Normal file
124
STD20Stream.cpp
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
// Copyright (c) 2020 Tom Deakin
|
||||||
|
// University of Bristol HPC
|
||||||
|
//
|
||||||
|
// For full license terms please see the LICENSE file distributed with this
|
||||||
|
// source code
|
||||||
|
|
||||||
|
#include "STD20Stream.hpp"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <execution>
|
||||||
|
#include <ranges>
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
STD20Stream<T>::STD20Stream(const unsigned int ARRAY_SIZE, int device)
|
||||||
|
: array_size{ARRAY_SIZE}
|
||||||
|
{
|
||||||
|
a = std::vector<T>(array_size);
|
||||||
|
b = std::vector<T>(array_size);
|
||||||
|
c = std::vector<T>(array_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void STD20Stream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
|
{
|
||||||
|
std::for_each_n(
|
||||||
|
std::execution::par_unseq,
|
||||||
|
std::views::iota(0).begin(), array_size, // loop range
|
||||||
|
[&] (int i) {
|
||||||
|
a[i] = initA;
|
||||||
|
b[i] = initB;
|
||||||
|
c[i] = initC;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void STD20Stream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||||
|
{
|
||||||
|
// Element-wise copy.
|
||||||
|
h_a = a;
|
||||||
|
h_b = b;
|
||||||
|
h_c = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void STD20Stream<T>::copy()
|
||||||
|
{
|
||||||
|
std::for_each_n(
|
||||||
|
std::execution::par_unseq,
|
||||||
|
std::views::iota(0).begin(), array_size,
|
||||||
|
[&] (int i) {
|
||||||
|
c[i] = a[i];
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void STD20Stream<T>::mul()
|
||||||
|
{
|
||||||
|
const T scalar = startScalar;
|
||||||
|
|
||||||
|
std::for_each_n(
|
||||||
|
std::execution::par_unseq,
|
||||||
|
std::views::iota(0).begin(), array_size,
|
||||||
|
[&] (int i) {
|
||||||
|
b[i] = scalar * c[i];
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void STD20Stream<T>::add()
|
||||||
|
{
|
||||||
|
std::for_each_n(
|
||||||
|
std::execution::par_unseq,
|
||||||
|
std::views::iota(0).begin(), array_size,
|
||||||
|
[&] (int i) {
|
||||||
|
c[i] = a[i] + b[i];
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void STD20Stream<T>::triad()
|
||||||
|
{
|
||||||
|
const T scalar = startScalar;
|
||||||
|
|
||||||
|
std::for_each_n(
|
||||||
|
std::execution::par_unseq,
|
||||||
|
std::views::iota(0).begin(), array_size,
|
||||||
|
[&] (int i) {
|
||||||
|
a[i] = b[i] + scalar * c[i];
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
T STD20Stream<T>::dot()
|
||||||
|
{
|
||||||
|
// sum += a[i] * b[i];
|
||||||
|
return
|
||||||
|
std::transform_reduce(
|
||||||
|
std::execution::par_unseq,
|
||||||
|
a.begin(), a.end(), b.begin(), 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void listDevices(void)
|
||||||
|
{
|
||||||
|
std::cout << "C++20 does not expose devices" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string getDeviceName(const int)
|
||||||
|
{
|
||||||
|
return std::string("Device name unavailable");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string getDeviceDriver(const int)
|
||||||
|
{
|
||||||
|
return std::string("Device driver unavailable");
|
||||||
|
}
|
||||||
|
|
||||||
|
template class STD20Stream<float>;
|
||||||
|
template class STD20Stream<double>;
|
||||||
|
|
||||||
42
STD20Stream.hpp
Normal file
42
STD20Stream.hpp
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
// Copyright (c) 2020 Tom Deakin
|
||||||
|
// University of Bristol HPC
|
||||||
|
//
|
||||||
|
// For full license terms please see the LICENSE file distributed with this
|
||||||
|
// source code
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "Stream.h"
|
||||||
|
|
||||||
|
#define IMPLEMENTATION_STRING "C++20"
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
class STD20Stream : public Stream<T>
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
// Size of arrays
|
||||||
|
unsigned int array_size;
|
||||||
|
|
||||||
|
// Device side pointers
|
||||||
|
std::vector<T> a;
|
||||||
|
std::vector<T> b;
|
||||||
|
std::vector<T> c;
|
||||||
|
|
||||||
|
public:
|
||||||
|
STD20Stream(const unsigned int, int);
|
||||||
|
~STD20Stream() = default;
|
||||||
|
|
||||||
|
virtual void copy() override;
|
||||||
|
virtual void add() override;
|
||||||
|
virtual void mul() override;
|
||||||
|
virtual void triad() override;
|
||||||
|
virtual T dot() override;
|
||||||
|
|
||||||
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
10
main.cpp
10
main.cpp
@ -23,6 +23,8 @@
|
|||||||
#include "CUDAStream.h"
|
#include "CUDAStream.h"
|
||||||
#elif defined(STD)
|
#elif defined(STD)
|
||||||
#include "STDStream.h"
|
#include "STDStream.h"
|
||||||
|
#elif defined(STD20)
|
||||||
|
#include "STD20Stream.hpp"
|
||||||
#elif defined(HIP)
|
#elif defined(HIP)
|
||||||
#include "HIPStream.h"
|
#include "HIPStream.h"
|
||||||
#elif defined(HC)
|
#elif defined(HC)
|
||||||
@ -160,6 +162,10 @@ void run()
|
|||||||
// Use the STD implementation
|
// Use the STD implementation
|
||||||
stream = new STDStream<T>(ARRAY_SIZE, deviceIndex);
|
stream = new STDStream<T>(ARRAY_SIZE, deviceIndex);
|
||||||
|
|
||||||
|
#elif defined(STD20)
|
||||||
|
// Use the C++20 implementation
|
||||||
|
stream = new STD20Stream<T>(ARRAY_SIZE, deviceIndex);
|
||||||
|
|
||||||
#elif defined(ACC)
|
#elif defined(ACC)
|
||||||
// Use the OpenACC implementation
|
// Use the OpenACC implementation
|
||||||
stream = new ACCStream<T>(ARRAY_SIZE, deviceIndex);
|
stream = new ACCStream<T>(ARRAY_SIZE, deviceIndex);
|
||||||
@ -368,6 +374,10 @@ void run_triad()
|
|||||||
// Use the STD implementation
|
// Use the STD implementation
|
||||||
stream = new STDStream<T>(ARRAY_SIZE, deviceIndex);
|
stream = new STDStream<T>(ARRAY_SIZE, deviceIndex);
|
||||||
|
|
||||||
|
#elif defined(STD20)
|
||||||
|
// Use the C++20 implementation
|
||||||
|
stream = new STD20Stream<T>(ARRAY_SIZE, deviceIndex);
|
||||||
|
|
||||||
#elif defined(SYCL)
|
#elif defined(SYCL)
|
||||||
// Use the SYCL implementation
|
// Use the SYCL implementation
|
||||||
stream = new SYCLStream<T>(ARRAY_SIZE, deviceIndex);
|
stream = new SYCLStream<T>(ARRAY_SIZE, deviceIndex);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user