parent
d9b089a0f9
commit
e5d54dd521
@ -42,7 +42,7 @@ void ACCStream<T>::init_arrays(T initA, T initB, T initC)
|
|||||||
T * restrict a = this->a;
|
T * restrict a = this->a;
|
||||||
T * restrict b = this->b;
|
T * restrict b = this->b;
|
||||||
T * restrict c = this->c;
|
T * restrict c = this->c;
|
||||||
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
a[i] = initA;
|
a[i] = initA;
|
||||||
@ -67,7 +67,7 @@ void ACCStream<T>::copy()
|
|||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T * restrict a = this->a;
|
T * restrict a = this->a;
|
||||||
T * restrict c = this->c;
|
T * restrict c = this->c;
|
||||||
#pragma acc kernels present(a[0:array_size], c[0:array_size]) wait
|
#pragma acc parallel loop present(a[0:array_size], c[0:array_size]) wait
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
c[i] = a[i];
|
c[i] = a[i];
|
||||||
@ -82,7 +82,7 @@ void ACCStream<T>::mul()
|
|||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T * restrict b = this->b;
|
T * restrict b = this->b;
|
||||||
T * restrict c = this->c;
|
T * restrict c = this->c;
|
||||||
#pragma acc kernels present(b[0:array_size], c[0:array_size]) wait
|
#pragma acc parallel loop present(b[0:array_size], c[0:array_size]) wait
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
b[i] = scalar * c[i];
|
b[i] = scalar * c[i];
|
||||||
@ -96,7 +96,7 @@ void ACCStream<T>::add()
|
|||||||
T * restrict a = this->a;
|
T * restrict a = this->a;
|
||||||
T * restrict b = this->b;
|
T * restrict b = this->b;
|
||||||
T * restrict c = this->c;
|
T * restrict c = this->c;
|
||||||
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
c[i] = a[i] + b[i];
|
c[i] = a[i] + b[i];
|
||||||
@ -112,7 +112,7 @@ void ACCStream<T>::triad()
|
|||||||
T * restrict a = this->a;
|
T * restrict a = this->a;
|
||||||
T * restrict b = this->b;
|
T * restrict b = this->b;
|
||||||
T * restrict c = this->c;
|
T * restrict c = this->c;
|
||||||
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
#pragma acc parallel loop present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
a[i] = b[i] + scalar * c[i];
|
a[i] = b[i] + scalar * c[i];
|
||||||
@ -127,7 +127,7 @@ T ACCStream<T>::dot()
|
|||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T * restrict a = this->a;
|
T * restrict a = this->a;
|
||||||
T * restrict b = this->b;
|
T * restrict b = this->b;
|
||||||
#pragma acc kernels present(a[0:array_size], b[0:array_size]) wait
|
#pragma acc parallel loop reduction(+:sum) present(a[0:array_size], b[0:array_size]) wait
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
sum += a[i] * b[i];
|
sum += a[i] * b[i];
|
||||||
|
|||||||
@ -13,6 +13,7 @@ All notable changes to this project will be documented in this file.
|
|||||||
- Output formatting of Kokkos implementation.
|
- Output formatting of Kokkos implementation.
|
||||||
- Capitalisation of Kokkos filenames.
|
- Capitalisation of Kokkos filenames.
|
||||||
- Updated HIP implementation to new interface.
|
- Updated HIP implementation to new interface.
|
||||||
|
- Use parallel loop instead of kernels for OpenACC
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
- Superfluous OpenMP 4.5 map(to:) clauses on kernel target regions.
|
- Superfluous OpenMP 4.5 map(to:) clauses on kernel target regions.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user