Switch data from 1.0, 2.0 and 3.0 to 0.1, 0.2, and 0.3 resp.

Using integers for maths gets unstable past 38 interations even
in double precision. Using the original values/10 is safe up to
the default 100 iterations.
This commit is contained in:
Tom Deakin 2016-05-11 15:51:19 +01:00
parent 55a858e0c0
commit 31cb567e21
9 changed files with 28 additions and 34 deletions

View File

@ -65,7 +65,7 @@ void ACCStream<T>::copy()
template <class T> template <class T>
void ACCStream<T>::mul() void ACCStream<T>::mul()
{ {
const T scalar = 3.0; const T scalar = 0.3;
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *b = this->b; T *b = this->b;
@ -94,7 +94,7 @@ void ACCStream<T>::add()
template <class T> template <class T>
void ACCStream<T>::triad() void ACCStream<T>::triad()
{ {
const T scalar = 3.0; const T scalar = 0.3;
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *a = this->a; T *a = this->a;
@ -133,4 +133,3 @@ std::string getDeviceDriver(const int)
} }
template class ACCStream<float>; template class ACCStream<float>;
template class ACCStream<double>; template class ACCStream<double>;

View File

@ -118,7 +118,7 @@ void CUDAStream<T>::copy()
template <typename T> template <typename T>
__global__ void mul_kernel(T * b, const T * c) __global__ void mul_kernel(T * b, const T * c)
{ {
const T scalar = 3.0; const T scalar = 0.3;
const int i = blockDim.x * blockIdx.x + threadIdx.x; const int i = blockDim.x * blockIdx.x + threadIdx.x;
b[i] = scalar * c[i]; b[i] = scalar * c[i];
} }
@ -151,7 +151,7 @@ void CUDAStream<T>::add()
template <typename T> template <typename T>
__global__ void triad_kernel(T * a, const T * b, const T * c) __global__ void triad_kernel(T * a, const T * b, const T * c)
{ {
const T scalar = 3.0; const T scalar = 0.3;
const int i = blockDim.x * blockIdx.x + threadIdx.x; const int i = blockDim.x * blockIdx.x + threadIdx.x;
a[i] = b[i] + scalar * c[i]; a[i] = b[i] + scalar * c[i];
} }

View File

@ -84,7 +84,7 @@ void KOKKOSStream<T>::mul()
View<double*, DEVICE> b(*d_b); View<double*, DEVICE> b(*d_b);
View<double*, DEVICE> c(*d_c); View<double*, DEVICE> c(*d_c);
const T scalar = 3.0; const T scalar = 0.3;
parallel_for(array_size, KOKKOS_LAMBDA (const int index) parallel_for(array_size, KOKKOS_LAMBDA (const int index)
{ {
b[index] = scalar*c[index]; b[index] = scalar*c[index];
@ -114,7 +114,7 @@ void KOKKOSStream<T>::triad()
View<double*, DEVICE> b(*d_b); View<double*, DEVICE> b(*d_b);
View<double*, DEVICE> c(*d_c); View<double*, DEVICE> c(*d_c);
const T scalar = 3.0; const T scalar = 0.3;
parallel_for(array_size, KOKKOS_LAMBDA (const int index) parallel_for(array_size, KOKKOS_LAMBDA (const int index)
{ {
a[index] = b[index] + scalar*c[index]; a[index] = b[index] + scalar*c[index];
@ -142,4 +142,3 @@ std::string getDeviceDriver(const int device)
//template class KOKKOSStream<float>; //template class KOKKOSStream<float>;
template class KOKKOSStream<double>; template class KOKKOSStream<double>;

View File

@ -14,7 +14,7 @@ void getDeviceList(void);
std::string kernels{R"CLC( std::string kernels{R"CLC(
constant TYPE scalar = 3.0; constant TYPE scalar = 0.3;
kernel void copy( kernel void copy(
global const TYPE * restrict a, global const TYPE * restrict a,
@ -253,4 +253,3 @@ std::string getDeviceDriver(const int device)
template class OCLStream<float>; template class OCLStream<float>;
template class OCLStream<double>; template class OCLStream<double>;

View File

@ -56,7 +56,7 @@ void OMP3Stream<T>::copy()
template <class T> template <class T>
void OMP3Stream<T>::mul() void OMP3Stream<T>::mul()
{ {
const T scalar = 3.0; const T scalar = 0.3;
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < array_size; i++) for (int i = 0; i < array_size; i++)
{ {
@ -77,7 +77,7 @@ void OMP3Stream<T>::add()
template <class T> template <class T>
void OMP3Stream<T>::triad() void OMP3Stream<T>::triad()
{ {
const T scalar = 3.0; const T scalar = 0.3;
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < array_size; i++) for (int i = 0; i < array_size; i++)
{ {
@ -103,4 +103,3 @@ std::string getDeviceDriver(const int)
template class OMP3Stream<float>; template class OMP3Stream<float>;
template class OMP3Stream<double>; template class OMP3Stream<double>;

View File

@ -64,7 +64,7 @@ void OMP45Stream<T>::copy()
template <class T> template <class T>
void OMP45Stream<T>::mul() void OMP45Stream<T>::mul()
{ {
const T scalar = 3.0; const T scalar = 0.3;
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *b = this->b; T *b = this->b;
@ -93,7 +93,7 @@ void OMP45Stream<T>::add()
template <class T> template <class T>
void OMP45Stream<T>::triad() void OMP45Stream<T>::triad()
{ {
const T scalar = 3.0; const T scalar = 0.3;
unsigned int array_size = this->array_size; unsigned int array_size = this->array_size;
T *a = this->a; T *a = this->a;
@ -132,4 +132,3 @@ std::string getDeviceDriver(const int)
} }
template class OMP45Stream<float>; template class OMP45Stream<float>;
template class OMP45Stream<double>; template class OMP45Stream<double>;

View File

@ -77,7 +77,7 @@ void RAJAStream<T>::mul()
{ {
T* b = d_b; T* b = d_b;
T* c = d_c; T* c = d_c;
const T scalar = 3.0; const T scalar = 0.3;
forall<policy>(index_set, [=] RAJA_DEVICE (int index) forall<policy>(index_set, [=] RAJA_DEVICE (int index)
{ {
b[index] = scalar*c[index]; b[index] = scalar*c[index];
@ -102,7 +102,7 @@ void RAJAStream<T>::triad()
T* a = d_a; T* a = d_a;
T* b = d_b; T* b = d_b;
T* c = d_c; T* c = d_c;
const T scalar = 3.0; const T scalar = 0.3;
forall<policy>(index_set, [=] RAJA_DEVICE (int index) forall<policy>(index_set, [=] RAJA_DEVICE (int index)
{ {
a[index] = b[index] + scalar*c[index]; a[index] = b[index] + scalar*c[index];
@ -128,4 +128,3 @@ std::string getDeviceDriver(const int device)
template class RAJAStream<float>; template class RAJAStream<float>;
template class RAJAStream<double>; template class RAJAStream<double>;

View File

@ -78,7 +78,7 @@ void SYCLStream<T>::copy()
template <class T> template <class T>
void SYCLStream<T>::mul() void SYCLStream<T>::mul()
{ {
const T scalar = 3.0; const T scalar = 0.3;
queue->submit([&](handler &cgh) queue->submit([&](handler &cgh)
{ {
auto kb = d_b->template get_access<access::mode::write>(cgh); auto kb = d_b->template get_access<access::mode::write>(cgh);
@ -110,7 +110,7 @@ void SYCLStream<T>::add()
template <class T> template <class T>
void SYCLStream<T>::triad() void SYCLStream<T>::triad()
{ {
const T scalar = 3.0; const T scalar = 0.3;
queue->submit([&](handler &cgh) queue->submit([&](handler &cgh)
{ {
auto ka = d_a->template get_access<access::mode::write>(cgh); auto ka = d_a->template get_access<access::mode::write>(cgh);

View File

@ -83,8 +83,8 @@ void run()
std::cout << "Precision: double" << std::endl; std::cout << "Precision: double" << std::endl;
// Create host vectors // Create host vectors
std::vector<T> a(ARRAY_SIZE, 1.0); std::vector<T> a(ARRAY_SIZE, 0.1);
std::vector<T> b(ARRAY_SIZE, 2.0); std::vector<T> b(ARRAY_SIZE, 0.2);
std::vector<T> c(ARRAY_SIZE, 0.0); std::vector<T> c(ARRAY_SIZE, 0.0);
std::streamsize ss = std::cout.precision(); std::streamsize ss = std::cout.precision();
std::cout << std::setprecision(1) << std::fixed std::cout << std::setprecision(1) << std::fixed
@ -216,11 +216,11 @@ template <typename T>
void check_solution(const unsigned int ntimes, std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) void check_solution(const unsigned int ntimes, std::vector<T>& a, std::vector<T>& b, std::vector<T>& c)
{ {
// Generate correct solution // Generate correct solution
T goldA = 1.0; T goldA = 0.1;
T goldB = 2.0; T goldB = 0.2;
T goldC = 0.0; T goldC = 0.0;
const T scalar = 3.0; const T scalar = 0.3;
for (unsigned int i = 0; i < ntimes; i++) for (unsigned int i = 0; i < ntimes; i++)
{ {