Use a compiler switch to select OpenMP directives (target or parallel for)
This commit is contained in:
parent
db01715806
commit
e6615944f4
@ -5,25 +5,31 @@
|
|||||||
// For full license terms please see the LICENSE file distributed with this
|
// For full license terms please see the LICENSE file distributed with this
|
||||||
// source code
|
// source code
|
||||||
|
|
||||||
#include "OMP45Stream.h"
|
#include "OMPStream.h"
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
OMP45Stream<T>::OMP45Stream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device)
|
OMPStream<T>::OMPStream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device)
|
||||||
{
|
{
|
||||||
omp_set_default_device(device);
|
|
||||||
|
|
||||||
array_size = ARRAY_SIZE;
|
array_size = ARRAY_SIZE;
|
||||||
|
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
|
omp_set_default_device(device);
|
||||||
// Set up data region on device
|
// Set up data region on device
|
||||||
this->a = a;
|
this->a = a;
|
||||||
this->b = b;
|
this->b = b;
|
||||||
this->c = c;
|
this->c = c;
|
||||||
#pragma omp target enter data map(alloc: a[0:array_size], b[0:array_size], c[0:array_size])
|
#pragma omp target enter data map(alloc: a[0:array_size], b[0:array_size], c[0:array_size])
|
||||||
{}
|
{}
|
||||||
|
#else
|
||||||
|
// Allocate on the host
|
||||||
|
this->a = (T*)malloc(sizeof(T)*array_size);
|
||||||
|
this->b = (T*)malloc(sizeof(T)*array_size);
|
||||||
|
this->c = (T*)malloc(sizeof(T)*array_size);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
OMP45Stream<T>::~OMP45Stream()
|
OMPStream<T>::~OMPStream()
|
||||||
{
|
{
|
||||||
// End data region on device
|
// End data region on device
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
@ -35,13 +41,17 @@ OMP45Stream<T>::~OMP45Stream()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::init_arrays(T initA, T initB, T initC)
|
void OMPStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
{
|
{
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
T *c = this->c;
|
T *c = this->c;
|
||||||
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
|
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
|
||||||
|
#else
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
a[i] = initA;
|
a[i] = initA;
|
||||||
@ -51,22 +61,36 @@ void OMP45Stream<T>::init_arrays(T initA, T initB, T initC)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
void OMPStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
|
||||||
{
|
{
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
T *c = this->c;
|
T *c = this->c;
|
||||||
#pragma omp target update from(a[0:array_size], b[0:array_size], c[0:array_size])
|
#pragma omp target update from(a[0:array_size], b[0:array_size], c[0:array_size])
|
||||||
{}
|
{}
|
||||||
|
#else
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i = 0; i < array_size; i++)
|
||||||
|
{
|
||||||
|
h_a[i] = a[i];
|
||||||
|
h_b[i] = b[i];
|
||||||
|
h_c[i] = c[i];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::copy()
|
void OMPStream<T>::copy()
|
||||||
{
|
{
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
T *c = this->c;
|
T *c = this->c;
|
||||||
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], c[0:array_size])
|
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], c[0:array_size])
|
||||||
|
#else
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
c[i] = a[i];
|
c[i] = a[i];
|
||||||
@ -74,14 +98,18 @@ void OMP45Stream<T>::copy()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::mul()
|
void OMPStream<T>::mul()
|
||||||
{
|
{
|
||||||
const T scalar = startScalar;
|
const T scalar = startScalar;
|
||||||
|
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
T *c = this->c;
|
T *c = this->c;
|
||||||
#pragma omp target teams distribute parallel for simd map(to: b[0:array_size], c[0:array_size])
|
#pragma omp target teams distribute parallel for simd map(to: b[0:array_size], c[0:array_size])
|
||||||
|
#else
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
b[i] = scalar * c[i];
|
b[i] = scalar * c[i];
|
||||||
@ -89,13 +117,17 @@ void OMP45Stream<T>::mul()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::add()
|
void OMPStream<T>::add()
|
||||||
{
|
{
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
T *c = this->c;
|
T *c = this->c;
|
||||||
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
|
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
|
||||||
|
#else
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
c[i] = a[i] + b[i];
|
c[i] = a[i] + b[i];
|
||||||
@ -103,15 +135,19 @@ void OMP45Stream<T>::add()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::triad()
|
void OMPStream<T>::triad()
|
||||||
{
|
{
|
||||||
const T scalar = startScalar;
|
const T scalar = startScalar;
|
||||||
|
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
T *c = this->c;
|
T *c = this->c;
|
||||||
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
|
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
|
||||||
|
#else
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
a[i] = b[i] + scalar * c[i];
|
a[i] = b[i] + scalar * c[i];
|
||||||
@ -119,14 +155,18 @@ void OMP45Stream<T>::triad()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
T OMP45Stream<T>::dot()
|
T OMPStream<T>::dot()
|
||||||
{
|
{
|
||||||
T sum = 0.0;
|
T sum = 0.0;
|
||||||
|
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
#pragma omp target teams distribute parallel for simd reduction(+:sum) map(tofrom: sum)
|
#pragma omp target teams distribute parallel for simd reduction(+:sum) map(tofrom: sum)
|
||||||
|
#else
|
||||||
|
#pragma omp parallel for reduction(+:sum)
|
||||||
|
#endif
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
sum += a[i] * b[i];
|
sum += a[i] * b[i];
|
||||||
@ -139,6 +179,7 @@ T OMP45Stream<T>::dot()
|
|||||||
|
|
||||||
void listDevices(void)
|
void listDevices(void)
|
||||||
{
|
{
|
||||||
|
#ifdef OMP_TARGET_GPU
|
||||||
// Get number of devices
|
// Get number of devices
|
||||||
int count = omp_get_num_devices();
|
int count = omp_get_num_devices();
|
||||||
|
|
||||||
@ -151,6 +192,9 @@ void listDevices(void)
|
|||||||
{
|
{
|
||||||
std::cout << "There are " << count << " devices." << std::endl;
|
std::cout << "There are " << count << " devices." << std::endl;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
std::cout << "0: CPU" << std::endl;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string getDeviceName(const int)
|
std::string getDeviceName(const int)
|
||||||
@ -162,5 +206,5 @@ std::string getDeviceDriver(const int)
|
|||||||
{
|
{
|
||||||
return std::string("Device driver unavailable");
|
return std::string("Device driver unavailable");
|
||||||
}
|
}
|
||||||
template class OMP45Stream<float>;
|
template class OMPStream<float>;
|
||||||
template class OMP45Stream<double>;
|
template class OMPStream<double>;
|
||||||
@ -17,7 +17,7 @@
|
|||||||
#define IMPLEMENTATION_STRING "OpenMP 4.5"
|
#define IMPLEMENTATION_STRING "OpenMP 4.5"
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
class OMP45Stream : public Stream<T>
|
class OMPStream : public Stream<T>
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
// Size of arrays
|
// Size of arrays
|
||||||
@ -29,8 +29,8 @@ class OMP45Stream : public Stream<T>
|
|||||||
T *c;
|
T *c;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
OMP45Stream(const unsigned int, T*, T*, T*, int);
|
OMPStream(const unsigned int, T*, T*, T*, int);
|
||||||
~OMP45Stream();
|
~OMPStream();
|
||||||
|
|
||||||
virtual void copy() override;
|
virtual void copy() override;
|
||||||
virtual void add() override;
|
virtual void add() override;
|
||||||
16
main.cpp
16
main.cpp
@ -32,10 +32,8 @@
|
|||||||
#include "ACCStream.h"
|
#include "ACCStream.h"
|
||||||
#elif defined(SYCL)
|
#elif defined(SYCL)
|
||||||
#include "SYCLStream.h"
|
#include "SYCLStream.h"
|
||||||
#elif defined(OMP3)
|
#elif defined(OMP)
|
||||||
#include "OMP3Stream.h"
|
#include "OMPStream.h"
|
||||||
#elif defined(OMP45)
|
|
||||||
#include "OMP45Stream.h"
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Default size of 2^25
|
// Default size of 2^25
|
||||||
@ -126,13 +124,9 @@ void run()
|
|||||||
// Use the SYCL implementation
|
// Use the SYCL implementation
|
||||||
stream = new SYCLStream<T>(ARRAY_SIZE, deviceIndex);
|
stream = new SYCLStream<T>(ARRAY_SIZE, deviceIndex);
|
||||||
|
|
||||||
#elif defined(OMP3)
|
#elif defined(OMP)
|
||||||
// Use the "reference" OpenMP 3 implementation
|
// Use the OpenMP implementation
|
||||||
stream = new OMP3Stream<T>(ARRAY_SIZE, a.data(), b.data(), c.data());
|
stream = new OMPStream<T>(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex);
|
||||||
|
|
||||||
#elif defined(OMP45)
|
|
||||||
// Use the "reference" OpenMP 3 implementation
|
|
||||||
stream = new OMP45Stream<T>(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user