diff --git a/ACCStream.cpp b/ACCStream.cpp index 85bf600..8e3bb32 100644 --- a/ACCStream.cpp +++ b/ACCStream.cpp @@ -65,7 +65,7 @@ void ACCStream::copy() template void ACCStream::mul() { - const T scalar = 3.0; + const T scalar = 0.3; unsigned int array_size = this->array_size; T *b = this->b; @@ -94,7 +94,7 @@ void ACCStream::add() template void ACCStream::triad() { - const T scalar = 3.0; + const T scalar = 0.3; unsigned int array_size = this->array_size; T *a = this->a; @@ -133,4 +133,3 @@ std::string getDeviceDriver(const int) } template class ACCStream; template class ACCStream; - diff --git a/CUDAStream.cu b/CUDAStream.cu index 2a28f9c..21d36fa 100644 --- a/CUDAStream.cu +++ b/CUDAStream.cu @@ -118,7 +118,7 @@ void CUDAStream::copy() template __global__ void mul_kernel(T * b, const T * c) { - const T scalar = 3.0; + const T scalar = 0.3; const int i = blockDim.x * blockIdx.x + threadIdx.x; b[i] = scalar * c[i]; } @@ -151,7 +151,7 @@ void CUDAStream::add() template __global__ void triad_kernel(T * a, const T * b, const T * c) { - const T scalar = 3.0; + const T scalar = 0.3; const int i = blockDim.x * blockIdx.x + threadIdx.x; a[i] = b[i] + scalar * c[i]; } diff --git a/KOKKOSStream.cpp b/KOKKOSStream.cpp index d93b6d7..a9bfcd9 100644 --- a/KOKKOSStream.cpp +++ b/KOKKOSStream.cpp @@ -70,7 +70,7 @@ void KOKKOSStream::copy() View b(*d_b); View c(*d_c); - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + parallel_for(array_size, KOKKOS_LAMBDA (const int index) { c[index] = a[index]; }); @@ -84,8 +84,8 @@ void KOKKOSStream::mul() View b(*d_b); View c(*d_c); - const T scalar = 3.0; - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + const T scalar = 0.3; + parallel_for(array_size, KOKKOS_LAMBDA (const int index) { b[index] = scalar*c[index]; }); @@ -99,7 +99,7 @@ void KOKKOSStream::add() View b(*d_b); View c(*d_c); - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + parallel_for(array_size, KOKKOS_LAMBDA (const int index) { c[index] = a[index] + b[index]; }); @@ -114,8 +114,8 @@ void KOKKOSStream::triad() View b(*d_b); View c(*d_c); - const T scalar = 3.0; - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + const T scalar = 0.3; + parallel_for(array_size, KOKKOS_LAMBDA (const int index) { a[index] = b[index] + scalar*c[index]; }); @@ -142,4 +142,3 @@ std::string getDeviceDriver(const int device) //template class KOKKOSStream; template class KOKKOSStream; - diff --git a/OCLStream.cpp b/OCLStream.cpp index f7c538e..50ad543 100644 --- a/OCLStream.cpp +++ b/OCLStream.cpp @@ -14,7 +14,7 @@ void getDeviceList(void); std::string kernels{R"CLC( - constant TYPE scalar = 3.0; + constant TYPE scalar = 0.3; kernel void copy( global const TYPE * restrict a, @@ -253,4 +253,3 @@ std::string getDeviceDriver(const int device) template class OCLStream; template class OCLStream; - diff --git a/OMP3Stream.cpp b/OMP3Stream.cpp index 8899cff..78b0124 100644 --- a/OMP3Stream.cpp +++ b/OMP3Stream.cpp @@ -56,7 +56,7 @@ void OMP3Stream::copy() template void OMP3Stream::mul() { - const T scalar = 3.0; + const T scalar = 0.3; #pragma omp parallel for for (int i = 0; i < array_size; i++) { @@ -77,7 +77,7 @@ void OMP3Stream::add() template void OMP3Stream::triad() { - const T scalar = 3.0; + const T scalar = 0.3; #pragma omp parallel for for (int i = 0; i < array_size; i++) { @@ -103,4 +103,3 @@ std::string getDeviceDriver(const int) template class OMP3Stream; template class OMP3Stream; - diff --git a/OMP45Stream.cpp b/OMP45Stream.cpp index f44f66f..7f6e66a 100644 --- a/OMP45Stream.cpp +++ b/OMP45Stream.cpp @@ -64,7 +64,7 @@ void OMP45Stream::copy() template void OMP45Stream::mul() { - const T scalar = 3.0; + const T scalar = 0.3; unsigned int array_size = this->array_size; T *b = this->b; @@ -93,7 +93,7 @@ void OMP45Stream::add() template void OMP45Stream::triad() { - const T scalar = 3.0; + const T scalar = 0.3; unsigned int array_size = this->array_size; T *a = this->a; @@ -132,4 +132,3 @@ std::string getDeviceDriver(const int) } template class OMP45Stream; template class OMP45Stream; - diff --git a/RAJAStream.cpp b/RAJAStream.cpp index eb98d54..e418f09 100644 --- a/RAJAStream.cpp +++ b/RAJAStream.cpp @@ -66,7 +66,7 @@ void RAJAStream::copy() { T* a = d_a; T* c = d_c; - forall(index_set, [=] RAJA_DEVICE (int index) + forall(index_set, [=] RAJA_DEVICE (int index) { c[index] = a[index]; }); @@ -77,8 +77,8 @@ void RAJAStream::mul() { T* b = d_b; T* c = d_c; - const T scalar = 3.0; - forall(index_set, [=] RAJA_DEVICE (int index) + const T scalar = 0.3; + forall(index_set, [=] RAJA_DEVICE (int index) { b[index] = scalar*c[index]; }); @@ -90,7 +90,7 @@ void RAJAStream::add() T* a = d_a; T* b = d_b; T* c = d_c; - forall(index_set, [=] RAJA_DEVICE (int index) + forall(index_set, [=] RAJA_DEVICE (int index) { c[index] = a[index] + b[index]; }); @@ -102,8 +102,8 @@ void RAJAStream::triad() T* a = d_a; T* b = d_b; T* c = d_c; - const T scalar = 3.0; - forall(index_set, [=] RAJA_DEVICE (int index) + const T scalar = 0.3; + forall(index_set, [=] RAJA_DEVICE (int index) { a[index] = b[index] + scalar*c[index]; }); @@ -128,4 +128,3 @@ std::string getDeviceDriver(const int device) template class RAJAStream; template class RAJAStream; - diff --git a/SYCLStream.cpp b/SYCLStream.cpp index 4f14590..d039d70 100644 --- a/SYCLStream.cpp +++ b/SYCLStream.cpp @@ -78,7 +78,7 @@ void SYCLStream::copy() template void SYCLStream::mul() { - const T scalar = 3.0; + const T scalar = 0.3; queue->submit([&](handler &cgh) { auto kb = d_b->template get_access(cgh); @@ -110,7 +110,7 @@ void SYCLStream::add() template void SYCLStream::triad() { - const T scalar = 3.0; + const T scalar = 0.3; queue->submit([&](handler &cgh) { auto ka = d_a->template get_access(cgh); diff --git a/main.cpp b/main.cpp index 80791d6..933c9a9 100644 --- a/main.cpp +++ b/main.cpp @@ -83,8 +83,8 @@ void run() std::cout << "Precision: double" << std::endl; // Create host vectors - std::vector a(ARRAY_SIZE, 1.0); - std::vector b(ARRAY_SIZE, 2.0); + std::vector a(ARRAY_SIZE, 0.1); + std::vector b(ARRAY_SIZE, 0.2); std::vector c(ARRAY_SIZE, 0.0); std::streamsize ss = std::cout.precision(); std::cout << std::setprecision(1) << std::fixed @@ -216,11 +216,11 @@ template void check_solution(const unsigned int ntimes, std::vector& a, std::vector& b, std::vector& c) { // Generate correct solution - T goldA = 1.0; - T goldB = 2.0; + T goldA = 0.1; + T goldB = 0.2; T goldC = 0.0; - const T scalar = 3.0; + const T scalar = 0.3; for (unsigned int i = 0; i < ntimes; i++) {