Merging in changes from trunk
This commit is contained in:
commit
0a738efa54
136
ACCStream.cpp
Normal file
136
ACCStream.cpp
Normal file
@ -0,0 +1,136 @@
|
||||
|
||||
#include "ACCStream.h"
|
||||
|
||||
template <class T>
|
||||
ACCStream<T>::ACCStream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device)
|
||||
{
|
||||
|
||||
acc_set_device_num(device, acc_device_nvidia);
|
||||
|
||||
array_size = ARRAY_SIZE;
|
||||
|
||||
// Set up data region on device
|
||||
this->a = a;
|
||||
this->b = b;
|
||||
this->c = c;
|
||||
#pragma acc enter data create(a[0:array_size], b[0:array_size], c[0:array_size])
|
||||
{}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
ACCStream<T>::~ACCStream()
|
||||
{
|
||||
// End data region on device
|
||||
unsigned int array_size = this->array_size;
|
||||
T *a = this->a;
|
||||
T *b = this->b;
|
||||
T *c = this->c;
|
||||
#pragma acc exit data delete(a[0:array_size], b[0:array_size], c[0:array_size])
|
||||
{}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void ACCStream<T>::write_arrays(const std::vector<T>& h_a, const std::vector<T>& h_b, const std::vector<T>& h_c)
|
||||
{
|
||||
T *a = this->a;
|
||||
T *b = this->b;
|
||||
T *c = this->c;
|
||||
#pragma acc update device(a[0:array_size], b[0:array_size], c[0:array_size])
|
||||
{}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void ACCStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
T *a = this->a;
|
||||
T *b = this->b;
|
||||
T *c = this->c;
|
||||
#pragma acc update host(a[0:array_size], b[0:array_size], c[0:array_size])
|
||||
{}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void ACCStream<T>::copy()
|
||||
{
|
||||
unsigned int array_size = this->array_size;
|
||||
T *a = this->a;
|
||||
T *c = this->c;
|
||||
#pragma acc kernels present(a[0:array_size], c[0:array_size]) wait
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
c[i] = a[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void ACCStream<T>::mul()
|
||||
{
|
||||
const T scalar = 3.0;
|
||||
|
||||
unsigned int array_size = this->array_size;
|
||||
T *b = this->b;
|
||||
T *c = this->c;
|
||||
#pragma acc kernels present(b[0:array_size], c[0:array_size]) wait
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
b[i] = scalar * c[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void ACCStream<T>::add()
|
||||
{
|
||||
unsigned int array_size = this->array_size;
|
||||
T *a = this->a;
|
||||
T *b = this->b;
|
||||
T *c = this->c;
|
||||
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void ACCStream<T>::triad()
|
||||
{
|
||||
const T scalar = 3.0;
|
||||
|
||||
unsigned int array_size = this->array_size;
|
||||
T *a = this->a;
|
||||
T *b = this->b;
|
||||
T *c = this->c;
|
||||
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
a[i] = b[i] + scalar * c[i];
|
||||
}
|
||||
}
|
||||
void listDevices(void)
|
||||
{
|
||||
// Get number of devices
|
||||
int count = acc_get_num_devices(acc_device_nvidia);
|
||||
|
||||
// Print device list
|
||||
if (count == 0)
|
||||
{
|
||||
std::cerr << "No devices found." << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "There are " << count << " devices." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::string getDeviceName(const int)
|
||||
{
|
||||
return std::string("Device name unavailable");
|
||||
}
|
||||
|
||||
std::string getDeviceDriver(const int)
|
||||
{
|
||||
return std::string("Device driver unavailable");
|
||||
}
|
||||
template class ACCStream<float>;
|
||||
template class ACCStream<double>;
|
||||
|
||||
39
ACCStream.h
Normal file
39
ACCStream.h
Normal file
@ -0,0 +1,39 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "Stream.h"
|
||||
|
||||
#include <openacc.h>
|
||||
|
||||
#define IMPLEMENTATION_STRING "OpenACC"
|
||||
|
||||
template <class T>
|
||||
class ACCStream : public Stream<T>
|
||||
{
|
||||
protected:
|
||||
// Size of arrays
|
||||
unsigned int array_size;
|
||||
// Device side pointers
|
||||
T *a;
|
||||
T *b;
|
||||
T *c;
|
||||
|
||||
public:
|
||||
ACCStream(const unsigned int, T*, T*, T*, int);
|
||||
~ACCStream();
|
||||
|
||||
virtual void copy() override;
|
||||
virtual void add() override;
|
||||
virtual void mul() override;
|
||||
virtual void triad() override;
|
||||
|
||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
@ -9,12 +9,11 @@ project(gpu-stream)
|
||||
set(gpu-stream_VERSION_MAJOR 2)
|
||||
set(gpu-stream_VERSION_MINOR 0)
|
||||
|
||||
list(APPEND CMAKE_CXX_FLAGS --std=c++11)
|
||||
|
||||
configure_file(common.h.in common.h)
|
||||
|
||||
find_package(CUDA 7.0 QUIET)
|
||||
if (${CUDA_FOUND})
|
||||
list(APPEND CUDA_NVCC_FLAGS --std=c++11)
|
||||
cuda_add_executable(gpu-stream-cuda main.cpp CUDAStream.cu)
|
||||
target_compile_definitions(gpu-stream-cuda PUBLIC CUDA)
|
||||
else (${CUDA_FOUND})
|
||||
@ -30,3 +29,20 @@ else (${OpenCL_FOUND})
|
||||
message("Skipping OpenCL...")
|
||||
endif (${OpenCL_FOUND})
|
||||
|
||||
# TODO: Find OpenACC implementations somehow
|
||||
if (true)
|
||||
add_executable(gpu-stream-acc main.cpp ACCStream.cpp)
|
||||
target_compile_definitions(gpu-stream-acc PUBLIC ACC)
|
||||
target_compile_options(gpu-stream-acc PUBLIC "-hstd=c++11")
|
||||
else ()
|
||||
message("Skipping OpenACC...")
|
||||
endif ()
|
||||
|
||||
# TODO: Find SYCL implementations somehow
|
||||
if (true)
|
||||
add_executable(gpu-stream-sycl main.cpp SYCLStream.cpp)
|
||||
target_compile_definitions(gpu-stream-sycl PUBLIC SYCL)
|
||||
set_property(TARGET gpu-stream-sycl PROPERTY CXX_STANDARD 14)
|
||||
else ()
|
||||
message("Skipping SYCL...")
|
||||
endif ()
|
||||
|
||||
106
OMP3Stream.cpp
Normal file
106
OMP3Stream.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
|
||||
#include "OMP3Stream.h"
|
||||
|
||||
template <class T>
|
||||
OMP3Stream<T>::OMP3Stream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c)
|
||||
{
|
||||
array_size = ARRAY_SIZE;
|
||||
this->a = (T*)malloc(sizeof(T)*array_size);
|
||||
this->b = (T*)malloc(sizeof(T)*array_size);
|
||||
this->c = (T*)malloc(sizeof(T)*array_size);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
OMP3Stream<T>::~OMP3Stream()
|
||||
{
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
void OMP3Stream<T>::write_arrays(const std::vector<T>& h_a, const std::vector<T>& h_b, const std::vector<T>& h_c)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
a[i] = h_a[i];
|
||||
b[i] = h_b[i];
|
||||
c[i] = h_c[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void OMP3Stream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
h_a[i] = a[i];
|
||||
h_b[i] = b[i];
|
||||
h_c[i] = c[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void OMP3Stream<T>::copy()
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
c[i] = a[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void OMP3Stream<T>::mul()
|
||||
{
|
||||
const T scalar = 3.0;
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
b[i] = scalar * c[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void OMP3Stream<T>::add()
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void OMP3Stream<T>::triad()
|
||||
{
|
||||
const T scalar = 3.0;
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
a[i] = b[i] + scalar * c[i];
|
||||
}
|
||||
}
|
||||
|
||||
void listDevices(void)
|
||||
{
|
||||
std::cout << "0: CPU" << std::endl;
|
||||
}
|
||||
|
||||
std::string getDeviceName(const int)
|
||||
{
|
||||
return std::string("Device name unavailable");
|
||||
}
|
||||
|
||||
std::string getDeviceDriver(const int)
|
||||
{
|
||||
return std::string("Device driver unavailable");
|
||||
}
|
||||
|
||||
|
||||
template class OMP3Stream<float>;
|
||||
template class OMP3Stream<double>;
|
||||
|
||||
35
OMP3Stream.h
Normal file
35
OMP3Stream.h
Normal file
@ -0,0 +1,35 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "Stream.h"
|
||||
|
||||
#define IMPLEMENTATION_STRING "Reference OpenMP"
|
||||
|
||||
template <class T>
|
||||
class OMP3Stream : public Stream<T>
|
||||
{
|
||||
protected:
|
||||
// Size of arrays
|
||||
unsigned int array_size;
|
||||
// Device side pointers
|
||||
T *a;
|
||||
T *b;
|
||||
T *c;
|
||||
|
||||
public:
|
||||
OMP3Stream(const unsigned int, T*, T*, T*);
|
||||
~OMP3Stream();
|
||||
|
||||
virtual void copy() override;
|
||||
virtual void add() override;
|
||||
virtual void mul() override;
|
||||
virtual void triad() override;
|
||||
|
||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||
|
||||
};
|
||||
|
||||
144
SYCLStream.cpp
Normal file
144
SYCLStream.cpp
Normal file
@ -0,0 +1,144 @@
|
||||
|
||||
// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
|
||||
// University of Bristol HPC
|
||||
//
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#include "SYCLStream.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace cl::sycl;
|
||||
|
||||
template <class T>
|
||||
SYCLStream<T>::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index)
|
||||
{
|
||||
array_size = ARRAY_SIZE;
|
||||
|
||||
// Create buffers
|
||||
d_a = buffer<T>(array_size);
|
||||
d_b = buffer<T>(array_size);
|
||||
d_c = buffer<T>(array_size);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
SYCLStream<T>::~SYCLStream()
|
||||
{
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void SYCLStream<T>::copy()
|
||||
{
|
||||
queue.submit([&](handler &cgh)
|
||||
{
|
||||
auto ka = d_a.template get_access<access::read>(cgh);
|
||||
auto kc = d_c.template get_access<access::write>(cgh);
|
||||
cgh.parallel_for(range<1>{array_size}, [=](id<1> index)
|
||||
{
|
||||
kc[index] = ka[index];
|
||||
});
|
||||
});
|
||||
queue.wait();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void SYCLStream<T>::mul()
|
||||
{
|
||||
const T scalar = 3.0;
|
||||
queue.submit([&](handler &cgh)
|
||||
{
|
||||
auto kb = d_b.template get_access<access::write>(cgh);
|
||||
auto kc = d_c.template get_access<access::read>(cgh);
|
||||
cgh.parallel_for(range<1>{array_size}, [=](id<1> index)
|
||||
{
|
||||
kb[index] = scalar * kc[index];
|
||||
});
|
||||
});
|
||||
queue.wait();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void SYCLStream<T>::add()
|
||||
{
|
||||
queue.submit([&](handler &cgh)
|
||||
{
|
||||
auto ka = d_a.template get_access<access::read>(cgh);
|
||||
auto kb = d_b.template get_access<access::read>(cgh);
|
||||
auto kc = d_c.template get_access<access::write>(cgh);
|
||||
cgh.parallel_for(range<1>{array_size}, [=](id<1> index)
|
||||
{
|
||||
kc[index] = ka[index] + kb[index];
|
||||
});
|
||||
});
|
||||
queue.wait();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void SYCLStream<T>::triad()
|
||||
{
|
||||
const T scalar = 3.0;
|
||||
queue.submit([&](handler &cgh)
|
||||
{
|
||||
auto ka = d_a.template get_access<access::write>(cgh);
|
||||
auto kb = d_b.template get_access<access::read>(cgh);
|
||||
auto kc = d_c.template get_access<access::read>(cgh);
|
||||
cgh.parallel_for(range<1>{array_size}, [=](id<1> index){
|
||||
ka[index] = kb[index] + scalar * kc[index];
|
||||
});
|
||||
});
|
||||
queue.wait();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void SYCLStream<T>::write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c)
|
||||
{
|
||||
auto _a = d_a.template get_access<access::write>();
|
||||
auto _b = d_b.template get_access<access::write>();
|
||||
auto _c = d_c.template get_access<access::write>();
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
_a[i] = a[i];
|
||||
_b[i] = b[i];
|
||||
_c[i] = c[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void SYCLStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c)
|
||||
{
|
||||
auto _a = d_a.template get_access<access::read>();
|
||||
auto _b = d_b.template get_access<access::read>();
|
||||
auto _c = d_c.template get_access<access::read>();
|
||||
for (int i = 0; i < array_size; i++)
|
||||
{
|
||||
a[i] = _a[i];
|
||||
b[i] = _b[i];
|
||||
c[i] = _c[i];
|
||||
}
|
||||
}
|
||||
|
||||
void listDevices(void)
|
||||
{
|
||||
// TODO: Get actual list of devices
|
||||
std::cout << std::endl;
|
||||
std::cout << "Devices:" << std::endl;
|
||||
std::cout << "0: " << "triSYCL" << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
std::string getDeviceName(const int device)
|
||||
{
|
||||
// TODO: Implement properly
|
||||
return "triSYCL";
|
||||
}
|
||||
|
||||
std::string getDeviceDriver(const int device)
|
||||
{
|
||||
// TODO: Implement properly
|
||||
return "triSCYL";
|
||||
}
|
||||
|
||||
|
||||
template class SYCLStream<float>;
|
||||
template class SYCLStream<double>;
|
||||
45
SYCLStream.h
Normal file
45
SYCLStream.h
Normal file
@ -0,0 +1,45 @@
|
||||
|
||||
// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
|
||||
// University of Bristol HPC
|
||||
//
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Stream.h"
|
||||
|
||||
#include "CL/sycl.hpp"
|
||||
|
||||
#define IMPLEMENTATION_STRING "SYCL"
|
||||
|
||||
template <class T>
|
||||
class SYCLStream : public Stream<T>
|
||||
{
|
||||
protected:
|
||||
// Size of arrays
|
||||
unsigned int array_size;
|
||||
|
||||
// SYCL objects
|
||||
cl::sycl::queue queue;
|
||||
cl::sycl::buffer<T> d_a;
|
||||
cl::sycl::buffer<T> d_b;
|
||||
cl::sycl::buffer<T> d_c;
|
||||
|
||||
public:
|
||||
|
||||
SYCLStream(const unsigned int, const int);
|
||||
~SYCLStream();
|
||||
|
||||
virtual void copy() override;
|
||||
virtual void add() override;
|
||||
virtual void mul() override;
|
||||
virtual void triad() override;
|
||||
|
||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||
|
||||
};
|
||||
|
||||
// Populate the devices list
|
||||
void getDeviceList(void);
|
||||
19
main.cpp
19
main.cpp
@ -26,6 +26,12 @@
|
||||
#include "RAJAStream.hpp"
|
||||
#elif defined(KOKKOS)
|
||||
#include "KOKKOSStream.hpp"
|
||||
#elif defined(ACC)
|
||||
#include "ACCStream.h"
|
||||
#elif defined(SYCL)
|
||||
#include "SYCLStream.h"
|
||||
#elif defined(OMP3)
|
||||
#include "OMP3Stream.h"
|
||||
#endif
|
||||
|
||||
unsigned int ARRAY_SIZE = 52428800;
|
||||
@ -91,6 +97,18 @@ void run()
|
||||
// Use the Kokkos implementation
|
||||
stream = new KOKKOSStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
|
||||
#elif defined(ACC)
|
||||
// Use the OpenACC implementation
|
||||
stream = new ACCStream<T>(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex);
|
||||
|
||||
#elif defined(SYCL)
|
||||
// Use the SYCL implementation
|
||||
stream = new SYCLStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
|
||||
#elif defined(OMP3)
|
||||
// Use the "reference" OpenMP 3 implementation
|
||||
stream = new OMP3Stream<T>(ARRAY_SIZE, a.data(), b.data(), c.data());
|
||||
|
||||
#endif
|
||||
|
||||
stream->write_arrays(a, b, c);
|
||||
@ -289,4 +307,3 @@ void parseArguments(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user