From e6615944f4e18c1e511653190720f5c10c951045 Mon Sep 17 00:00:00 2001 From: Tom Deakin Date: Fri, 9 Dec 2016 11:49:58 +0000 Subject: [PATCH] Use a compiler switch to select OpenMP directives (target or parallel for) --- OMP45Stream.cpp => OMPStream.cpp | 72 +++++++++++++++++++++++++------- OMP45Stream.h => OMPStream.h | 6 +-- main.cpp | 16 +++---- 3 files changed, 66 insertions(+), 28 deletions(-) rename OMP45Stream.cpp => OMPStream.cpp (71%) rename OMP45Stream.h => OMPStream.h (87%) diff --git a/OMP45Stream.cpp b/OMPStream.cpp similarity index 71% rename from OMP45Stream.cpp rename to OMPStream.cpp index 8ba0434..da51937 100644 --- a/OMP45Stream.cpp +++ b/OMPStream.cpp @@ -5,25 +5,31 @@ // For full license terms please see the LICENSE file distributed with this // source code -#include "OMP45Stream.h" +#include "OMPStream.h" template -OMP45Stream::OMP45Stream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device) +OMPStream::OMPStream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device) { - omp_set_default_device(device); - array_size = ARRAY_SIZE; +#ifdef OMP_TARGET_GPU + omp_set_default_device(device); // Set up data region on device this->a = a; this->b = b; this->c = c; #pragma omp target enter data map(alloc: a[0:array_size], b[0:array_size], c[0:array_size]) {} +#else + // Allocate on the host + this->a = (T*)malloc(sizeof(T)*array_size); + this->b = (T*)malloc(sizeof(T)*array_size); + this->c = (T*)malloc(sizeof(T)*array_size); +#endif } template -OMP45Stream::~OMP45Stream() +OMPStream::~OMPStream() { // End data region on device unsigned int array_size = this->array_size; @@ -35,13 +41,17 @@ OMP45Stream::~OMP45Stream() } template -void OMP45Stream::init_arrays(T initA, T initB, T initC) +void OMPStream::init_arrays(T initA, T initB, T initC) { unsigned int array_size = this->array_size; +#ifdef OMP_TARGET_GPU T *a = this->a; T *b = this->b; T *c = this->c; #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size]) +#else + #pragma omp parallel for +#endif for (int i = 0; i < array_size; i++) { a[i] = initA; @@ -51,22 +61,36 @@ void OMP45Stream::init_arrays(T initA, T initB, T initC) } template -void OMP45Stream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) +void OMPStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) { +#ifdef OMP_TARGET_GPU T *a = this->a; T *b = this->b; T *c = this->c; #pragma omp target update from(a[0:array_size], b[0:array_size], c[0:array_size]) {} +#else + #pragma omp parallel for + for (int i = 0; i < array_size; i++) + { + h_a[i] = a[i]; + h_b[i] = b[i]; + h_c[i] = c[i]; + } +#endif } template -void OMP45Stream::copy() +void OMPStream::copy() { +#ifdef OMP_TARGET_GPU unsigned int array_size = this->array_size; T *a = this->a; T *c = this->c; #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], c[0:array_size]) +#else + #pragma omp parallel for +#endif for (int i = 0; i < array_size; i++) { c[i] = a[i]; @@ -74,14 +98,18 @@ void OMP45Stream::copy() } template -void OMP45Stream::mul() +void OMPStream::mul() { const T scalar = startScalar; +#ifdef OMP_TARGET_GPU unsigned int array_size = this->array_size; T *b = this->b; T *c = this->c; #pragma omp target teams distribute parallel for simd map(to: b[0:array_size], c[0:array_size]) +#else + #pragma omp parallel for +#endif for (int i = 0; i < array_size; i++) { b[i] = scalar * c[i]; @@ -89,13 +117,17 @@ void OMP45Stream::mul() } template -void OMP45Stream::add() +void OMPStream::add() { +#ifdef OMP_TARGET_GPU unsigned int array_size = this->array_size; T *a = this->a; T *b = this->b; T *c = this->c; #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size]) +#else + #pragma omp parallel for +#endif for (int i = 0; i < array_size; i++) { c[i] = a[i] + b[i]; @@ -103,15 +135,19 @@ void OMP45Stream::add() } template -void OMP45Stream::triad() +void OMPStream::triad() { const T scalar = startScalar; +#ifdef OMP_TARGET_GPU unsigned int array_size = this->array_size; T *a = this->a; T *b = this->b; T *c = this->c; #pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size]) +#else + #pragma omp parallel for +#endif for (int i = 0; i < array_size; i++) { a[i] = b[i] + scalar * c[i]; @@ -119,14 +155,18 @@ void OMP45Stream::triad() } template -T OMP45Stream::dot() +T OMPStream::dot() { T sum = 0.0; +#ifdef OMP_TARGET_GPU unsigned int array_size = this->array_size; T *a = this->a; T *b = this->b; #pragma omp target teams distribute parallel for simd reduction(+:sum) map(tofrom: sum) +#else + #pragma omp parallel for reduction(+:sum) +#endif for (int i = 0; i < array_size; i++) { sum += a[i] * b[i]; @@ -139,6 +179,7 @@ T OMP45Stream::dot() void listDevices(void) { +#ifdef OMP_TARGET_GPU // Get number of devices int count = omp_get_num_devices(); @@ -151,6 +192,9 @@ void listDevices(void) { std::cout << "There are " << count << " devices." << std::endl; } +#else + std::cout << "0: CPU" << std::endl; +#endif } std::string getDeviceName(const int) @@ -162,5 +206,5 @@ std::string getDeviceDriver(const int) { return std::string("Device driver unavailable"); } -template class OMP45Stream; -template class OMP45Stream; +template class OMPStream; +template class OMPStream; diff --git a/OMP45Stream.h b/OMPStream.h similarity index 87% rename from OMP45Stream.h rename to OMPStream.h index e99fdeb..08af194 100644 --- a/OMP45Stream.h +++ b/OMPStream.h @@ -17,7 +17,7 @@ #define IMPLEMENTATION_STRING "OpenMP 4.5" template -class OMP45Stream : public Stream +class OMPStream : public Stream { protected: // Size of arrays @@ -29,8 +29,8 @@ class OMP45Stream : public Stream T *c; public: - OMP45Stream(const unsigned int, T*, T*, T*, int); - ~OMP45Stream(); + OMPStream(const unsigned int, T*, T*, T*, int); + ~OMPStream(); virtual void copy() override; virtual void add() override; diff --git a/main.cpp b/main.cpp index 2d80814..c73322f 100644 --- a/main.cpp +++ b/main.cpp @@ -32,10 +32,8 @@ #include "ACCStream.h" #elif defined(SYCL) #include "SYCLStream.h" -#elif defined(OMP3) -#include "OMP3Stream.h" -#elif defined(OMP45) -#include "OMP45Stream.h" +#elif defined(OMP) +#include "OMPStream.h" #endif // Default size of 2^25 @@ -126,13 +124,9 @@ void run() // Use the SYCL implementation stream = new SYCLStream(ARRAY_SIZE, deviceIndex); -#elif defined(OMP3) - // Use the "reference" OpenMP 3 implementation - stream = new OMP3Stream(ARRAY_SIZE, a.data(), b.data(), c.data()); - -#elif defined(OMP45) - // Use the "reference" OpenMP 3 implementation - stream = new OMP45Stream(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex); +#elif defined(OMP) + // Use the OpenMP implementation + stream = new OMPStream(ARRAY_SIZE, a.data(), b.data(), c.data(), deviceIndex); #endif