BabelStream/ACCStream.cpp
Tom Deakin 31cb567e21 Switch data from 1.0, 2.0 and 3.0 to 0.1, 0.2, and 0.3 resp.
Using integers for maths gets unstable past 38 interations even
in double precision. Using the original values/10 is safe up to
the default 100 iterations.
2016-05-11 15:51:19 +01:00

136 lines
2.9 KiB
C++

#include "ACCStream.h"
template <class T>
ACCStream<T>::ACCStream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device)
{
acc_set_device_num(device, acc_device_nvidia);
array_size = ARRAY_SIZE;
// Set up data region on device
this->a = a;
this->b = b;
this->c = c;
#pragma acc enter data create(a[0:array_size], b[0:array_size], c[0:array_size])
{}
}
template <class T>
ACCStream<T>::~ACCStream()
{
// End data region on device
unsigned int array_size = this->array_size;
T *a = this->a;
T *b = this->b;
T *c = this->c;
#pragma acc exit data delete(a[0:array_size], b[0:array_size], c[0:array_size])
{}
}
template <class T>
void ACCStream<T>::write_arrays(const std::vector<T>& h_a, const std::vector<T>& h_b, const std::vector<T>& h_c)
{
T *a = this->a;
T *b = this->b;
T *c = this->c;
#pragma acc update device(a[0:array_size], b[0:array_size], c[0:array_size])
{}
}
template <class T>
void ACCStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
{
T *a = this->a;
T *b = this->b;
T *c = this->c;
#pragma acc update host(a[0:array_size], b[0:array_size], c[0:array_size])
{}
}
template <class T>
void ACCStream<T>::copy()
{
unsigned int array_size = this->array_size;
T *a = this->a;
T *c = this->c;
#pragma acc kernels present(a[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
c[i] = a[i];
}
}
template <class T>
void ACCStream<T>::mul()
{
const T scalar = 0.3;
unsigned int array_size = this->array_size;
T *b = this->b;
T *c = this->c;
#pragma acc kernels present(b[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
b[i] = scalar * c[i];
}
}
template <class T>
void ACCStream<T>::add()
{
unsigned int array_size = this->array_size;
T *a = this->a;
T *b = this->b;
T *c = this->c;
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
c[i] = a[i] + b[i];
}
}
template <class T>
void ACCStream<T>::triad()
{
const T scalar = 0.3;
unsigned int array_size = this->array_size;
T *a = this->a;
T *b = this->b;
T *c = this->c;
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
a[i] = b[i] + scalar * c[i];
}
}
void listDevices(void)
{
// Get number of devices
int count = acc_get_num_devices(acc_device_nvidia);
// Print device list
if (count == 0)
{
std::cerr << "No devices found." << std::endl;
}
else
{
std::cout << "There are " << count << " devices." << std::endl;
}
}
std::string getDeviceName(const int)
{
return std::string("Device name unavailable");
}
std::string getDeviceDriver(const int)
{
return std::string("Device driver unavailable");
}
template class ACCStream<float>;
template class ACCStream<double>;