diff --git a/OMPStream.cpp b/OMPStream.cpp index 6b2800d..8063987 100644 --- a/OMPStream.cpp +++ b/OMPStream.cpp @@ -191,6 +191,31 @@ void OMPStream::triad() #endif } +template +void OMPStream::nstream() +{ + const T scalar = startScalar; + +#ifdef OMP_TARGET_GPU + int array_size = this->array_size; + T *a = this->a; + T *b = this->b; + T *c = this->c; + #pragma omp target teams distribute parallel for simd +#else + #pragma omp parallel for +#endif + for (int i = 0; i < array_size; i++) + { + a[i] += b[i] + scalar * c[i]; + } + #if defined(OMP_TARGET_GPU) && defined(_CRAYC) + // If using the Cray compiler, the kernels do not block, so this update forces + // a small copy to ensure blocking so that timing is correct + #pragma omp target update from(a[0:0]) + #endif +} + template T OMPStream::dot() { diff --git a/OMPStream.h b/OMPStream.h index 8c93986..5a5622f 100644 --- a/OMPStream.h +++ b/OMPStream.h @@ -36,6 +36,7 @@ class OMPStream : public Stream virtual void add() override; virtual void mul() override; virtual void triad() override; + virtual void nstream() override; virtual T dot() override; virtual void init_arrays(T initA, T initB, T initC) override;