Use a compiler switch to select OpenMP directives (target or parallel for)

2016-12-09 11:49:58 +00:00 · 2016-12-09 11:49:58 +00:00 · e6615944f4
commit e6615944f4
parent db01715806
3 changed files with 66 additions and 28 deletions
--- a/OMP45Stream.cpp
+++ b/OMP45Stream.cpp
@ -5,25 +5,31 @@
 // For full license terms please see the LICENSE file distributed with this
 // source code
-#include "OMP45Stream.h"
+#include "OMPStream.h"
 template <class T>
-OMP45Stream<T>::OMP45Stream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device)
+OMPStream<T>::OMPStream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device)
 {
  omp_set_default_device(device);
  array_size = ARRAY_SIZE;
 #ifdef OMP_TARGET_GPU
  omp_set_default_device(device);
  // Set up data region on device
  this->a = a;
  this->b = b;
  this->c = c;
  #pragma omp target enter data map(alloc: a[0:array_size], b[0:array_size], c[0:array_size])
  {}
 #else
  // Allocate on the host
  this->a = (T*)malloc(sizeof(T)*array_size);
  this->b = (T*)malloc(sizeof(T)*array_size);
  this->c = (T*)malloc(sizeof(T)*array_size);
 #endif
 }
 template <class T>
-OMP45Stream<T>::~OMP45Stream()
+OMPStream<T>::~OMPStream()
 {
  // End data region on device
  unsigned int array_size = this->array_size;
@ -35,13 +41,17 @@ OMP45Stream<T>::~OMP45Stream()
 }
 template <class T>
-void OMP45Stream<T>::init_arrays(T initA, T initB, T initC)
+void OMPStream<T>::init_arrays(T initA, T initB, T initC)
 {
  unsigned int array_size = this->array_size;
 #ifdef OMP_TARGET_GPU
  T *a = this->a;
  T *b = this->b;
  T *c = this->c;
  #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
 #else
  #pragma omp parallel for
 #endif
  for (int i = 0; i < array_size; i++)
  {
    a[i] = initA;
@ -51,22 +61,36 @@ void OMP45Stream<T>::init_arrays(T initA, T initB, T initC)
 }
 template <class T>
-void OMP45Stream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
+void OMPStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
 {
 #ifdef OMP_TARGET_GPU
  T *a = this->a;
  T *b = this->b;
  T *c = this->c;
  #pragma omp target update from(a[0:array_size], b[0:array_size], c[0:array_size])
  {}
 #else
  #pragma omp parallel for
  for (int i = 0; i < array_size; i++)
  {
    h_a[i] = a[i];
    h_b[i] = b[i];
    h_c[i] = c[i];
  }
 #endif
 }
 template <class T>
-void OMP45Stream<T>::copy()
+void OMPStream<T>::copy()
 {
 #ifdef OMP_TARGET_GPU
  unsigned int array_size = this->array_size;
  T *a = this->a;
  T *c = this->c;
  #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], c[0:array_size])
 #else
  #pragma omp parallel for
 #endif
  for (int i = 0; i < array_size; i++)
  {
    c[i] = a[i];
@ -74,14 +98,18 @@ void OMP45Stream<T>::copy()
 }
 template <class T>
-void OMP45Stream<T>::mul()
+void OMPStream<T>::mul()
 {
  const T scalar = startScalar;
 #ifdef OMP_TARGET_GPU
  unsigned int array_size = this->array_size;
  T *b = this->b;
  T *c = this->c;
  #pragma omp target teams distribute parallel for simd map(to: b[0:array_size], c[0:array_size])
 #else
  #pragma omp parallel for
 #endif
  for (int i = 0; i < array_size; i++)
  {
    b[i] = scalar * c[i];
@ -89,13 +117,17 @@ void OMP45Stream<T>::mul()
 }
 template <class T>
-void OMP45Stream<T>::add()
+void OMPStream<T>::add()
 {
 #ifdef OMP_TARGET_GPU
  unsigned int array_size = this->array_size;
  T *a = this->a;
  T *b = this->b;
  T *c = this->c;
  #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
 #else
  #pragma omp parallel for
 #endif
  for (int i = 0; i < array_size; i++)
  {
    c[i] = a[i] + b[i];
@ -103,15 +135,19 @@ void OMP45Stream<T>::add()
 }
 template <class T>
-void OMP45Stream<T>::triad()
+void OMPStream<T>::triad()
 {
  const T scalar = startScalar;
 #ifdef OMP_TARGET_GPU
  unsigned int array_size = this->array_size;
  T *a = this->a;
  T *b = this->b;
  T *c = this->c;
  #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
 #else
  #pragma omp parallel for
 #endif
  for (int i = 0; i < array_size; i++)
  {
    a[i] = b[i] + scalar * c[i];
@ -119,14 +155,18 @@ void OMP45Stream<T>::triad()
 }
 template <class T>
-T OMP45Stream<T>::dot()
+T OMPStream<T>::dot()
 {
  T sum = 0.0;
 #ifdef OMP_TARGET_GPU
  unsigned int array_size = this->array_size;
  T *a = this->a;
  T *b = this->b;
  #pragma omp target teams distribute parallel for simd reduction(+:sum) map(tofrom: sum)
 #else
  #pragma omp parallel for reduction(+:sum)
 #endif
  for (int i = 0; i < array_size; i++)
  {
    sum += a[i] * b[i];
@ -139,6 +179,7 @@ T OMP45Stream<T>::dot()
 void listDevices(void)
 {
 #ifdef OMP_TARGET_GPU
  // Get number of devices
  int count = omp_get_num_devices();
@ -151,6 +192,9 @@ void listDevices(void)
  {
    std::cout << "There are " << count << " devices." << std::endl;
  }
 #else
  std::cout << "0: CPU" << std::endl;
 #endif
 }
 std::string getDeviceName(const int)
@ -162,5 +206,5 @@ std::string getDeviceDriver(const int)
 {
  return std::string("Device driver unavailable");
 }
-template class OMP45Stream<float>;
+template class OMPStream<float>;
-template class OMP45Stream<double>;
+template class OMPStream<double>;
--- a/OMP45Stream.h
+++ b/OMP45Stream.h
@ -17,7 +17,7 @@
 #define IMPLEMENTATION_STRING "OpenMP 4.5"
 template <class T>
-class OMP45Stream : public Stream<T>
+class OMPStream : public Stream<T>
 {
  protected:
    // Size of arrays
@ -29,8 +29,8 @@ class OMP45Stream : public Stream<T>
    T *c;
  public:
-    OMP45Stream(const unsigned int, T*, T*, T*, int);
+    OMPStream(const unsigned int, T*, T*, T*, int);
-    ~OMP45Stream();
+    ~OMPStream();
    virtual void copy() override;
    virtual void add() override;
--- a/main.cpp
+++ b/main.cpp
@ -32,10 +32,8 @@
 #include "ACCStream.h"
 #elif defined(SYCL)
 #include "SYCLStream.h"
-#elif defined(OMP3)
+#elif defined(OMP)
-#include "OMP3Stream.h"
+#include "OMPStream.h"
 #elif defined(OMP45)
 #include "OMP45Stream.h"
 #endif
 // Default size of 2^25
@ -126,13 +124,9 @@ void run()
  // Use the SYCL implementation
  stream = new SYCLStream<T>(ARRAY_SIZE, deviceIndex);
-#elif defined(OMP3)
+#elif defined(OMP)
-  // Use the "reference" OpenMP 3 implementation
+  // Use the OpenMP implementation
-  stream = new OMP3Stream<T>(ARRAY_SIZE, a.data(), b.data(), c.data());
+  stream = new OMPStream<T>(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex);
 #elif defined(OMP45)
  // Use the "reference" OpenMP 3 implementation
  stream = new OMP45Stream<T>(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex);
 #endif