Use a compiler switch to select OpenMP directives (target or parallel for)

This commit is contained in:
Tom Deakin 2016-12-09 11:49:58 +00:00 committed by James Price
parent db01715806
commit e6615944f4
3 changed files with 66 additions and 28 deletions

View File

@ -5,25 +5,31 @@
// For full license terms please see the LICENSE file distributed with this // For full license terms please see the LICENSE file distributed with this
// source code // source code
#include "OMP45Stream.h" #include "OMPStream.h"
template <class T> template <class T>
OMP45Stream<T>::OMP45Stream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device) OMPStream<T>::OMPStream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device)
{ {
omp_set_default_device(device);
array_size = ARRAY_SIZE; array_size = ARRAY_SIZE;
#ifdef OMP_TARGET_GPU
omp_set_default_device(device);
// Set up data region on device // Set up data region on device
this->a = a; this->a = a;
this->b = b; this->b = b;
this->c = c; this->c = c;
#pragma omp target enter data map(alloc: a[0:array_size], b[0:array_size], c[0:array_size]) #pragma omp target enter data map(alloc: a[0:array_size], b[0:array_size], c[0:array_size])
{} {}
#else
// Allocate on the host
this->a = (T*)malloc(sizeof(T)*array_size);
this->b = (T*)malloc(sizeof(T)*array_size);
this->c = (T*)malloc(sizeof(T)*array_size);
#endif
} }
template <class T> template <class T>
OMP45Stream<T>::~OMP45Stream() OMPStream<T>::~OMPStream()
{ {
// End data region on device // End data region on device
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
@ -35,13 +41,17 @@ OMP45Stream<T>::~OMP45Stream()
} }
template <class T> template <class T>
void OMP45Stream<T>::init_arrays(T initA, T initB, T initC) void OMPStream<T>::init_arrays(T initA, T initB, T initC)
{ {
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
#ifdef OMP_TARGET_GPU
T *a = this->a; T *a = this->a;
T *b = this->b; T *b = this->b;
T *c = this->c; T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size]) #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++) for (int i = 0; i < array_size; i++)
{ {
a[i] = initA; a[i] = initA;
@ -51,22 +61,36 @@ void OMP45Stream<T>::init_arrays(T initA, T initB, T initC)
} }
template <class T> template <class T>
void OMP45Stream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c) void OMPStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
{ {
#ifdef OMP_TARGET_GPU
T *a = this->a; T *a = this->a;
T *b = this->b; T *b = this->b;
T *c = this->c; T *c = this->c;
#pragma omp target update from(a[0:array_size], b[0:array_size], c[0:array_size]) #pragma omp target update from(a[0:array_size], b[0:array_size], c[0:array_size])
{} {}
#else
#pragma omp parallel for
for (int i = 0; i < array_size; i++)
{
h_a[i] = a[i];
h_b[i] = b[i];
h_c[i] = c[i];
}
#endif
} }
template <class T> template <class T>
void OMP45Stream<T>::copy() void OMPStream<T>::copy()
{ {
#ifdef OMP_TARGET_GPU
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *a = this->a; T *a = this->a;
T *c = this->c; T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], c[0:array_size]) #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], c[0:array_size])
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++) for (int i = 0; i < array_size; i++)
{ {
c[i] = a[i]; c[i] = a[i];
@ -74,14 +98,18 @@ void OMP45Stream<T>::copy()
} }
template <class T> template <class T>
void OMP45Stream<T>::mul() void OMPStream<T>::mul()
{ {
const T scalar = startScalar; const T scalar = startScalar;
#ifdef OMP_TARGET_GPU
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *b = this->b; T *b = this->b;
T *c = this->c; T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: b[0:array_size], c[0:array_size]) #pragma omp target teams distribute parallel for simd map(to: b[0:array_size], c[0:array_size])
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++) for (int i = 0; i < array_size; i++)
{ {
b[i] = scalar * c[i]; b[i] = scalar * c[i];
@ -89,13 +117,17 @@ void OMP45Stream<T>::mul()
} }
template <class T> template <class T>
void OMP45Stream<T>::add() void OMPStream<T>::add()
{ {
#ifdef OMP_TARGET_GPU
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *a = this->a; T *a = this->a;
T *b = this->b; T *b = this->b;
T *c = this->c; T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size]) #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++) for (int i = 0; i < array_size; i++)
{ {
c[i] = a[i] + b[i]; c[i] = a[i] + b[i];
@ -103,15 +135,19 @@ void OMP45Stream<T>::add()
} }
template <class T> template <class T>
void OMP45Stream<T>::triad() void OMPStream<T>::triad()
{ {
const T scalar = startScalar; const T scalar = startScalar;
#ifdef OMP_TARGET_GPU
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *a = this->a; T *a = this->a;
T *b = this->b; T *b = this->b;
T *c = this->c; T *c = this->c;
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size]) #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
#else
#pragma omp parallel for
#endif
for (int i = 0; i < array_size; i++) for (int i = 0; i < array_size; i++)
{ {
a[i] = b[i] + scalar * c[i]; a[i] = b[i] + scalar * c[i];
@ -119,14 +155,18 @@ void OMP45Stream<T>::triad()
} }
template <class T> template <class T>
T OMP45Stream<T>::dot() T OMPStream<T>::dot()
{ {
T sum = 0.0; T sum = 0.0;
#ifdef OMP_TARGET_GPU
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *a = this->a; T *a = this->a;
T *b = this->b; T *b = this->b;
#pragma omp target teams distribute parallel for simd reduction(+:sum) map(tofrom: sum) #pragma omp target teams distribute parallel for simd reduction(+:sum) map(tofrom: sum)
#else
#pragma omp parallel for reduction(+:sum)
#endif
for (int i = 0; i < array_size; i++) for (int i = 0; i < array_size; i++)
{ {
sum += a[i] * b[i]; sum += a[i] * b[i];
@ -139,6 +179,7 @@ T OMP45Stream<T>::dot()
void listDevices(void) void listDevices(void)
{ {
#ifdef OMP_TARGET_GPU
// Get number of devices // Get number of devices
int count = omp_get_num_devices(); int count = omp_get_num_devices();
@ -151,6 +192,9 @@ void listDevices(void)
{ {
std::cout << "There are " << count << " devices." << std::endl; std::cout << "There are " << count << " devices." << std::endl;
} }
#else
std::cout << "0: CPU" << std::endl;
#endif
} }
std::string getDeviceName(const int) std::string getDeviceName(const int)
@ -162,5 +206,5 @@ std::string getDeviceDriver(const int)
{ {
return std::string("Device driver unavailable"); return std::string("Device driver unavailable");
} }
template class OMP45Stream<float>; template class OMPStream<float>;
template class OMP45Stream<double>; template class OMPStream<double>;

View File

@ -17,7 +17,7 @@
#define IMPLEMENTATION_STRING "OpenMP 4.5" #define IMPLEMENTATION_STRING "OpenMP 4.5"
template <class T> template <class T>
class OMP45Stream : public Stream<T> class OMPStream : public Stream<T>
{ {
protected: protected:
// Size of arrays // Size of arrays
@ -29,8 +29,8 @@ class OMP45Stream : public Stream<T>
T *c; T *c;
public: public:
OMP45Stream(const unsigned int, T*, T*, T*, int); OMPStream(const unsigned int, T*, T*, T*, int);
~OMP45Stream(); ~OMPStream();
virtual void copy() override; virtual void copy() override;
virtual void add() override; virtual void add() override;

View File

@ -32,10 +32,8 @@
#include "ACCStream.h" #include "ACCStream.h"
#elif defined(SYCL) #elif defined(SYCL)
#include "SYCLStream.h" #include "SYCLStream.h"
#elif defined(OMP3) #elif defined(OMP)
#include "OMP3Stream.h" #include "OMPStream.h"
#elif defined(OMP45)
#include "OMP45Stream.h"
#endif #endif
// Default size of 2^25 // Default size of 2^25
@ -126,13 +124,9 @@ void run()
// Use the SYCL implementation // Use the SYCL implementation
stream = new SYCLStream<T>(ARRAY_SIZE, deviceIndex); stream = new SYCLStream<T>(ARRAY_SIZE, deviceIndex);
#elif defined(OMP3) #elif defined(OMP)
// Use the "reference" OpenMP 3 implementation // Use the OpenMP implementation
stream = new OMP3Stream<T>(ARRAY_SIZE, a.data(), b.data(), c.data()); stream = new OMPStream<T>(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex);
#elif defined(OMP45)
// Use the "reference" OpenMP 3 implementation
stream = new OMP45Stream<T>(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex);
#endif #endif