Switch data from 1.0, 2.0 and 3.0 to 0.1, 0.2, and 0.3 resp.
Using integers for maths gets unstable past 38 interations even in double precision. Using the original values/10 is safe up to the default 100 iterations.
This commit is contained in:
parent
55a858e0c0
commit
31cb567e21
@ -65,7 +65,7 @@ void ACCStream<T>::copy()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void ACCStream<T>::mul()
|
void ACCStream<T>::mul()
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
|
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
@ -94,7 +94,7 @@ void ACCStream<T>::add()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void ACCStream<T>::triad()
|
void ACCStream<T>::triad()
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
|
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
@ -133,4 +133,3 @@ std::string getDeviceDriver(const int)
|
|||||||
}
|
}
|
||||||
template class ACCStream<float>;
|
template class ACCStream<float>;
|
||||||
template class ACCStream<double>;
|
template class ACCStream<double>;
|
||||||
|
|
||||||
|
|||||||
@ -118,7 +118,7 @@ void CUDAStream<T>::copy()
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void mul_kernel(T * b, const T * c)
|
__global__ void mul_kernel(T * b, const T * c)
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
const int i = blockDim.x * blockIdx.x + threadIdx.x;
|
const int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
b[i] = scalar * c[i];
|
b[i] = scalar * c[i];
|
||||||
}
|
}
|
||||||
@ -151,7 +151,7 @@ void CUDAStream<T>::add()
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void triad_kernel(T * a, const T * b, const T * c)
|
__global__ void triad_kernel(T * a, const T * b, const T * c)
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
const int i = blockDim.x * blockIdx.x + threadIdx.x;
|
const int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
a[i] = b[i] + scalar * c[i];
|
a[i] = b[i] + scalar * c[i];
|
||||||
}
|
}
|
||||||
|
|||||||
@ -70,7 +70,7 @@ void KOKKOSStream<T>::copy()
|
|||||||
View<double*, DEVICE> b(*d_b);
|
View<double*, DEVICE> b(*d_b);
|
||||||
View<double*, DEVICE> c(*d_c);
|
View<double*, DEVICE> c(*d_c);
|
||||||
|
|
||||||
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
||||||
{
|
{
|
||||||
c[index] = a[index];
|
c[index] = a[index];
|
||||||
});
|
});
|
||||||
@ -84,8 +84,8 @@ void KOKKOSStream<T>::mul()
|
|||||||
View<double*, DEVICE> b(*d_b);
|
View<double*, DEVICE> b(*d_b);
|
||||||
View<double*, DEVICE> c(*d_c);
|
View<double*, DEVICE> c(*d_c);
|
||||||
|
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
||||||
{
|
{
|
||||||
b[index] = scalar*c[index];
|
b[index] = scalar*c[index];
|
||||||
});
|
});
|
||||||
@ -99,7 +99,7 @@ void KOKKOSStream<T>::add()
|
|||||||
View<double*, DEVICE> b(*d_b);
|
View<double*, DEVICE> b(*d_b);
|
||||||
View<double*, DEVICE> c(*d_c);
|
View<double*, DEVICE> c(*d_c);
|
||||||
|
|
||||||
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
||||||
{
|
{
|
||||||
c[index] = a[index] + b[index];
|
c[index] = a[index] + b[index];
|
||||||
});
|
});
|
||||||
@ -114,8 +114,8 @@ void KOKKOSStream<T>::triad()
|
|||||||
View<double*, DEVICE> b(*d_b);
|
View<double*, DEVICE> b(*d_b);
|
||||||
View<double*, DEVICE> c(*d_c);
|
View<double*, DEVICE> c(*d_c);
|
||||||
|
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
||||||
{
|
{
|
||||||
a[index] = b[index] + scalar*c[index];
|
a[index] = b[index] + scalar*c[index];
|
||||||
});
|
});
|
||||||
@ -142,4 +142,3 @@ std::string getDeviceDriver(const int device)
|
|||||||
|
|
||||||
//template class KOKKOSStream<float>;
|
//template class KOKKOSStream<float>;
|
||||||
template class KOKKOSStream<double>;
|
template class KOKKOSStream<double>;
|
||||||
|
|
||||||
|
|||||||
@ -14,7 +14,7 @@ void getDeviceList(void);
|
|||||||
|
|
||||||
std::string kernels{R"CLC(
|
std::string kernels{R"CLC(
|
||||||
|
|
||||||
constant TYPE scalar = 3.0;
|
constant TYPE scalar = 0.3;
|
||||||
|
|
||||||
kernel void copy(
|
kernel void copy(
|
||||||
global const TYPE * restrict a,
|
global const TYPE * restrict a,
|
||||||
@ -253,4 +253,3 @@ std::string getDeviceDriver(const int device)
|
|||||||
|
|
||||||
template class OCLStream<float>;
|
template class OCLStream<float>;
|
||||||
template class OCLStream<double>;
|
template class OCLStream<double>;
|
||||||
|
|
||||||
|
|||||||
@ -56,7 +56,7 @@ void OMP3Stream<T>::copy()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void OMP3Stream<T>::mul()
|
void OMP3Stream<T>::mul()
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
@ -77,7 +77,7 @@ void OMP3Stream<T>::add()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void OMP3Stream<T>::triad()
|
void OMP3Stream<T>::triad()
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
@ -103,4 +103,3 @@ std::string getDeviceDriver(const int)
|
|||||||
|
|
||||||
template class OMP3Stream<float>;
|
template class OMP3Stream<float>;
|
||||||
template class OMP3Stream<double>;
|
template class OMP3Stream<double>;
|
||||||
|
|
||||||
|
|||||||
@ -64,7 +64,7 @@ void OMP45Stream<T>::copy()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::mul()
|
void OMP45Stream<T>::mul()
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
|
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
@ -93,7 +93,7 @@ void OMP45Stream<T>::add()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::triad()
|
void OMP45Stream<T>::triad()
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
|
|
||||||
unsigned int array_size = this->array_size;
|
unsigned int array_size = this->array_size;
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
@ -132,4 +132,3 @@ std::string getDeviceDriver(const int)
|
|||||||
}
|
}
|
||||||
template class OMP45Stream<float>;
|
template class OMP45Stream<float>;
|
||||||
template class OMP45Stream<double>;
|
template class OMP45Stream<double>;
|
||||||
|
|
||||||
|
|||||||
@ -66,7 +66,7 @@ void RAJAStream<T>::copy()
|
|||||||
{
|
{
|
||||||
T* a = d_a;
|
T* a = d_a;
|
||||||
T* c = d_c;
|
T* c = d_c;
|
||||||
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
||||||
{
|
{
|
||||||
c[index] = a[index];
|
c[index] = a[index];
|
||||||
});
|
});
|
||||||
@ -77,8 +77,8 @@ void RAJAStream<T>::mul()
|
|||||||
{
|
{
|
||||||
T* b = d_b;
|
T* b = d_b;
|
||||||
T* c = d_c;
|
T* c = d_c;
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
||||||
{
|
{
|
||||||
b[index] = scalar*c[index];
|
b[index] = scalar*c[index];
|
||||||
});
|
});
|
||||||
@ -90,7 +90,7 @@ void RAJAStream<T>::add()
|
|||||||
T* a = d_a;
|
T* a = d_a;
|
||||||
T* b = d_b;
|
T* b = d_b;
|
||||||
T* c = d_c;
|
T* c = d_c;
|
||||||
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
||||||
{
|
{
|
||||||
c[index] = a[index] + b[index];
|
c[index] = a[index] + b[index];
|
||||||
});
|
});
|
||||||
@ -102,8 +102,8 @@ void RAJAStream<T>::triad()
|
|||||||
T* a = d_a;
|
T* a = d_a;
|
||||||
T* b = d_b;
|
T* b = d_b;
|
||||||
T* c = d_c;
|
T* c = d_c;
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
||||||
{
|
{
|
||||||
a[index] = b[index] + scalar*c[index];
|
a[index] = b[index] + scalar*c[index];
|
||||||
});
|
});
|
||||||
@ -128,4 +128,3 @@ std::string getDeviceDriver(const int device)
|
|||||||
|
|
||||||
template class RAJAStream<float>;
|
template class RAJAStream<float>;
|
||||||
template class RAJAStream<double>;
|
template class RAJAStream<double>;
|
||||||
|
|
||||||
|
|||||||
@ -78,7 +78,7 @@ void SYCLStream<T>::copy()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void SYCLStream<T>::mul()
|
void SYCLStream<T>::mul()
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
queue->submit([&](handler &cgh)
|
queue->submit([&](handler &cgh)
|
||||||
{
|
{
|
||||||
auto kb = d_b->template get_access<access::mode::write>(cgh);
|
auto kb = d_b->template get_access<access::mode::write>(cgh);
|
||||||
@ -110,7 +110,7 @@ void SYCLStream<T>::add()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void SYCLStream<T>::triad()
|
void SYCLStream<T>::triad()
|
||||||
{
|
{
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
queue->submit([&](handler &cgh)
|
queue->submit([&](handler &cgh)
|
||||||
{
|
{
|
||||||
auto ka = d_a->template get_access<access::mode::write>(cgh);
|
auto ka = d_a->template get_access<access::mode::write>(cgh);
|
||||||
|
|||||||
10
main.cpp
10
main.cpp
@ -83,8 +83,8 @@ void run()
|
|||||||
std::cout << "Precision: double" << std::endl;
|
std::cout << "Precision: double" << std::endl;
|
||||||
|
|
||||||
// Create host vectors
|
// Create host vectors
|
||||||
std::vector<T> a(ARRAY_SIZE, 1.0);
|
std::vector<T> a(ARRAY_SIZE, 0.1);
|
||||||
std::vector<T> b(ARRAY_SIZE, 2.0);
|
std::vector<T> b(ARRAY_SIZE, 0.2);
|
||||||
std::vector<T> c(ARRAY_SIZE, 0.0);
|
std::vector<T> c(ARRAY_SIZE, 0.0);
|
||||||
std::streamsize ss = std::cout.precision();
|
std::streamsize ss = std::cout.precision();
|
||||||
std::cout << std::setprecision(1) << std::fixed
|
std::cout << std::setprecision(1) << std::fixed
|
||||||
@ -216,11 +216,11 @@ template <typename T>
|
|||||||
void check_solution(const unsigned int ntimes, std::vector<T>& a, std::vector<T>& b, std::vector<T>& c)
|
void check_solution(const unsigned int ntimes, std::vector<T>& a, std::vector<T>& b, std::vector<T>& c)
|
||||||
{
|
{
|
||||||
// Generate correct solution
|
// Generate correct solution
|
||||||
T goldA = 1.0;
|
T goldA = 0.1;
|
||||||
T goldB = 2.0;
|
T goldB = 0.2;
|
||||||
T goldC = 0.0;
|
T goldC = 0.0;
|
||||||
|
|
||||||
const T scalar = 3.0;
|
const T scalar = 0.3;
|
||||||
|
|
||||||
for (unsigned int i = 0; i < ntimes; i++)
|
for (unsigned int i = 0; i < ntimes; i++)
|
||||||
{
|
{
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user