Use parallel loop for OpenACC instead of kernels

Closes #53.
This commit is contained in:
Tom Deakin 2018-07-25 15:53:50 +00:00
parent d9b089a0f9
commit e5d54dd521
2 changed files with 7 additions and 6 deletions

View File

@ -42,7 +42,7 @@ void ACCStream<T>::init_arrays(T initA, T initB, T initC)
T * restrict a = this->a;
T * restrict b = this->b;
T * restrict c = this->c;
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
a[i] = initA;
@ -67,7 +67,7 @@ void ACCStream<T>::copy()
unsigned int array_size = this->array_size;
T * restrict a = this->a;
T * restrict c = this->c;
#pragma acc kernels present(a[0:array_size], c[0:array_size]) wait
#pragma acc parallel loop present(a[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
c[i] = a[i];
@ -82,7 +82,7 @@ void ACCStream<T>::mul()
unsigned int array_size = this->array_size;
T * restrict b = this->b;
T * restrict c = this->c;
#pragma acc kernels present(b[0:array_size], c[0:array_size]) wait
#pragma acc parallel loop present(b[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
b[i] = scalar * c[i];
@ -96,7 +96,7 @@ void ACCStream<T>::add()
T * restrict a = this->a;
T * restrict b = this->b;
T * restrict c = this->c;
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
c[i] = a[i] + b[i];
@ -112,7 +112,7 @@ void ACCStream<T>::triad()
T * restrict a = this->a;
T * restrict b = this->b;
T * restrict c = this->c;
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
a[i] = b[i] + scalar * c[i];
@ -127,7 +127,7 @@ T ACCStream<T>::dot()
unsigned int array_size = this->array_size;
T * restrict a = this->a;
T * restrict b = this->b;
#pragma acc kernels present(a[0:array_size], b[0:array_size]) wait
#pragma acc parallel loop reduction(+:sum) present(a[0:array_size], b[0:array_size]) wait
for (int i = 0; i < array_size; i++)
{
sum += a[i] * b[i];

View File

@ -13,6 +13,7 @@ All notable changes to this project will be documented in this file.
- Output formatting of Kokkos implementation.
- Capitalisation of Kokkos filenames.
- Updated HIP implementation to new interface.
- Use parallel loop instead of kernels for OpenACC
### Removed
- Superfluous OpenMP 4.5 map(to:) clauses on kernel target regions.