From e5d54dd5213c3a483d5b4d73d9949e2211be0edc Mon Sep 17 00:00:00 2001 From: Tom Deakin Date: Wed, 25 Jul 2018 15:53:50 +0000 Subject: [PATCH] Use parallel loop for OpenACC instead of kernels Closes #53. --- ACCStream.cpp | 12 ++++++------ CHANGELOG.md | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ACCStream.cpp b/ACCStream.cpp index 0e591a8..4be2a11 100644 --- a/ACCStream.cpp +++ b/ACCStream.cpp @@ -42,7 +42,7 @@ void ACCStream::init_arrays(T initA, T initB, T initC) T * restrict a = this->a; T * restrict b = this->b; T * restrict c = this->c; - #pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait + #pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait for (int i = 0; i < array_size; i++) { a[i] = initA; @@ -67,7 +67,7 @@ void ACCStream::copy() unsigned int array_size = this->array_size; T * restrict a = this->a; T * restrict c = this->c; - #pragma acc kernels present(a[0:array_size], c[0:array_size]) wait + #pragma acc parallel loop present(a[0:array_size], c[0:array_size]) wait for (int i = 0; i < array_size; i++) { c[i] = a[i]; @@ -82,7 +82,7 @@ void ACCStream::mul() unsigned int array_size = this->array_size; T * restrict b = this->b; T * restrict c = this->c; - #pragma acc kernels present(b[0:array_size], c[0:array_size]) wait + #pragma acc parallel loop present(b[0:array_size], c[0:array_size]) wait for (int i = 0; i < array_size; i++) { b[i] = scalar * c[i]; @@ -96,7 +96,7 @@ void ACCStream::add() T * restrict a = this->a; T * restrict b = this->b; T * restrict c = this->c; - #pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait + #pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait for (int i = 0; i < array_size; i++) { c[i] = a[i] + b[i]; @@ -112,7 +112,7 @@ void ACCStream::triad() T * restrict a = this->a; T * restrict b = this->b; T * restrict c = this->c; - #pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait + #pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait for (int i = 0; i < array_size; i++) { a[i] = b[i] + scalar * c[i]; @@ -127,7 +127,7 @@ T ACCStream::dot() unsigned int array_size = this->array_size; T * restrict a = this->a; T * restrict b = this->b; - #pragma acc kernels present(a[0:array_size], b[0:array_size]) wait + #pragma acc parallel loop reduction(+:sum) present(a[0:array_size], b[0:array_size]) wait for (int i = 0; i < array_size; i++) { sum += a[i] * b[i]; diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ef7f7d..81cf7b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ All notable changes to this project will be documented in this file. - Output formatting of Kokkos implementation. - Capitalisation of Kokkos filenames. - Updated HIP implementation to new interface. +- Use parallel loop instead of kernels for OpenACC ### Removed - Superfluous OpenMP 4.5 map(to:) clauses on kernel target regions.