Merge remote-tracking branch 'origin/init-arrays' into devel
This commit is contained in:
commit
d42bcd4675
@ -36,13 +36,19 @@ ACCStream<T>::~ACCStream()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void ACCStream<T>::write_arrays(const std::vector<T>& h_a, const std::vector<T>& h_b, const std::vector<T>& h_c)
|
void ACCStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
{
|
{
|
||||||
T *a = this->a;
|
unsigned int array_size = this->array_size;
|
||||||
T *b = this->b;
|
T * restrict a = this->a;
|
||||||
T *c = this->c;
|
T * restrict b = this->b;
|
||||||
#pragma acc update device(a[0:array_size], b[0:array_size], c[0:array_size])
|
T * restrict c = this->c;
|
||||||
{}
|
#pragma acc kernels present(a[0:array_size], b[0:array_size], c[0:array_size]) wait
|
||||||
|
for (int i = 0; i < array_size; i++)
|
||||||
|
{
|
||||||
|
a[i] = initA;
|
||||||
|
b[i] = initB;
|
||||||
|
c[i] = initC;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
|||||||
@ -37,7 +37,7 @@ class ACCStream : public Stream<T>
|
|||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
virtual T dot() override;
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -79,15 +79,22 @@ CUDAStream<T>::~CUDAStream()
|
|||||||
check_error();
|
check_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
|
||||||
void CUDAStream<T>::write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c)
|
template <typename T>
|
||||||
|
__global__ void init_kernel(T * a, T * b, T * c, T initA, T initB, T initC)
|
||||||
{
|
{
|
||||||
// Copy host memory to device
|
const int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
cudaMemcpy(d_a, a.data(), a.size()*sizeof(T), cudaMemcpyHostToDevice);
|
a[i] = initA;
|
||||||
|
b[i] = initB;
|
||||||
|
c[i] = initC;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void CUDAStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
|
{
|
||||||
|
init_kernel<<<array_size/TBSIZE, TBSIZE>>>(d_a, d_b, d_c, initA, initB, initC);
|
||||||
check_error();
|
check_error();
|
||||||
cudaMemcpy(d_b, b.data(), b.size()*sizeof(T), cudaMemcpyHostToDevice);
|
cudaDeviceSynchronize();
|
||||||
check_error();
|
|
||||||
cudaMemcpy(d_c, c.data(), c.size()*sizeof(T), cudaMemcpyHostToDevice);
|
|
||||||
check_error();
|
check_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -46,7 +46,7 @@ class CUDAStream : public Stream<T>
|
|||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
virtual T dot() override;
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
20
HIPStream.cu
20
HIPStream.cu
@ -74,15 +74,21 @@ HIPStream<T>::~HIPStream()
|
|||||||
check_error();
|
check_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <typename T>
|
||||||
void HIPStream<T>::write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c)
|
__global__ void init_kernel(hipLaunchParm lp, T * a, T * b, T * c, T initA, T initB, T initC)
|
||||||
{
|
{
|
||||||
// Copy host memory to device
|
const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||||
hipMemcpy(d_a, a.data(), a.size()*sizeof(T), hipMemcpyHostToDevice);
|
a[i] = initA;
|
||||||
|
b[i] = initB;
|
||||||
|
c[i] = initC;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void HIPStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
|
{
|
||||||
|
hipLaunchKernel(HIP_KERNEL_NAME(init_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c, initA, initB, initC);
|
||||||
check_error();
|
check_error();
|
||||||
hipMemcpy(d_b, b.data(), b.size()*sizeof(T), hipMemcpyHostToDevice);
|
hipDeviceSynchronize();
|
||||||
check_error();
|
|
||||||
hipMemcpy(d_c, c.data(), c.size()*sizeof(T), hipMemcpyHostToDevice);
|
|
||||||
check_error();
|
check_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -37,7 +37,7 @@ class HIPStream : public Stream<T>
|
|||||||
virtual void mul() override;
|
virtual void mul() override;
|
||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
|
|
||||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@ -34,18 +34,18 @@ KOKKOSStream<T>::~KOKKOSStream()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void KOKKOSStream<T>::write_arrays(
|
void KOKKOSStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c)
|
|
||||||
{
|
{
|
||||||
for(int ii = 0; ii < array_size; ++ii)
|
View<double*, DEVICE> a(*d_a);
|
||||||
|
View<double*, DEVICE> b(*d_b);
|
||||||
|
View<double*, DEVICE> c(*d_c);
|
||||||
|
parallel_for(array_size, KOKKOS_LAMBDA (const int index)
|
||||||
{
|
{
|
||||||
(*hm_a)(ii) = a[ii];
|
a[index] = initA;
|
||||||
(*hm_b)(ii) = b[ii];
|
b[index] - initB;
|
||||||
(*hm_c)(ii) = c[ii];
|
c[index] = initC;
|
||||||
}
|
});
|
||||||
deep_copy(*d_a, *hm_a);
|
Kokkos::fence();
|
||||||
deep_copy(*d_b, *hm_b);
|
|
||||||
deep_copy(*d_c, *hm_c);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
|||||||
@ -49,8 +49,7 @@ class KOKKOSStream : public Stream<T>
|
|||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
virtual T dot() override;
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void write_arrays(
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
|
||||||
virtual void read_arrays(
|
virtual void read_arrays(
|
||||||
std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -16,6 +16,18 @@ std::string kernels{R"CLC(
|
|||||||
|
|
||||||
constant TYPE scalar = startScalar;
|
constant TYPE scalar = startScalar;
|
||||||
|
|
||||||
|
kernel void init(
|
||||||
|
global TYPE * restrict a,
|
||||||
|
global TYPE * restrict b,
|
||||||
|
global TYPE * restrict c,
|
||||||
|
TYPE initA, TYPE initB, TYPE initC)
|
||||||
|
{
|
||||||
|
const size_t i = get_global_id(0);
|
||||||
|
a[i] = initA;
|
||||||
|
b[i] = initB;
|
||||||
|
c[i] = initC;
|
||||||
|
}
|
||||||
|
|
||||||
kernel void copy(
|
kernel void copy(
|
||||||
global const TYPE * restrict a,
|
global const TYPE * restrict a,
|
||||||
global TYPE * restrict c)
|
global TYPE * restrict c)
|
||||||
@ -140,6 +152,7 @@ OCLStream<T>::OCLStream(const unsigned int ARRAY_SIZE, const int device_index)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create kernels
|
// Create kernels
|
||||||
|
init_kernel = new cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, T, T, T>(program, "init");
|
||||||
copy_kernel = new cl::KernelFunctor<cl::Buffer, cl::Buffer>(program, "copy");
|
copy_kernel = new cl::KernelFunctor<cl::Buffer, cl::Buffer>(program, "copy");
|
||||||
mul_kernel = new cl::KernelFunctor<cl::Buffer, cl::Buffer>(program, "mul");
|
mul_kernel = new cl::KernelFunctor<cl::Buffer, cl::Buffer>(program, "mul");
|
||||||
add_kernel = new cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer>(program, "add");
|
add_kernel = new cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer>(program, "add");
|
||||||
@ -168,6 +181,7 @@ OCLStream<T>::OCLStream(const unsigned int ARRAY_SIZE, const int device_index)
|
|||||||
template <class T>
|
template <class T>
|
||||||
OCLStream<T>::~OCLStream()
|
OCLStream<T>::~OCLStream()
|
||||||
{
|
{
|
||||||
|
delete init_kernel;
|
||||||
delete copy_kernel;
|
delete copy_kernel;
|
||||||
delete mul_kernel;
|
delete mul_kernel;
|
||||||
delete add_kernel;
|
delete add_kernel;
|
||||||
@ -231,11 +245,13 @@ T OCLStream<T>::dot()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OCLStream<T>::write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c)
|
void OCLStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
{
|
{
|
||||||
cl::copy(queue, a.begin(), a.end(), d_a);
|
(*init_kernel)(
|
||||||
cl::copy(queue, b.begin(), b.end(), d_b);
|
cl::EnqueueArgs(queue, cl::NDRange(array_size)),
|
||||||
cl::copy(queue, c.begin(), c.end(), d_c);
|
d_a, d_b, d_c, initA, initB, initC
|
||||||
|
);
|
||||||
|
queue.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
|||||||
@ -42,6 +42,7 @@ class OCLStream : public Stream<T>
|
|||||||
cl::Context context;
|
cl::Context context;
|
||||||
cl::CommandQueue queue;
|
cl::CommandQueue queue;
|
||||||
|
|
||||||
|
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, T, T, T> *init_kernel;
|
||||||
cl::KernelFunctor<cl::Buffer, cl::Buffer> *copy_kernel;
|
cl::KernelFunctor<cl::Buffer, cl::Buffer> *copy_kernel;
|
||||||
cl::KernelFunctor<cl::Buffer, cl::Buffer> * mul_kernel;
|
cl::KernelFunctor<cl::Buffer, cl::Buffer> * mul_kernel;
|
||||||
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer> *add_kernel;
|
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer> *add_kernel;
|
||||||
@ -63,7 +64,7 @@ class OCLStream : public Stream<T>
|
|||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
virtual T dot() override;
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@ -26,14 +26,14 @@ OMP3Stream<T>::~OMP3Stream()
|
|||||||
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OMP3Stream<T>::write_arrays(const std::vector<T>& h_a, const std::vector<T>& h_b, const std::vector<T>& h_c)
|
void OMP3Stream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
{
|
{
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int i = 0; i < array_size; i++)
|
for (int i = 0; i < array_size; i++)
|
||||||
{
|
{
|
||||||
a[i] = h_a[i];
|
a[i] = initA;
|
||||||
b[i] = h_b[i];
|
b[i] = initB;
|
||||||
c[i] = h_c[i];
|
c[i] = initC;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -35,7 +35,7 @@ class OMP3Stream : public Stream<T>
|
|||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
virtual T dot() override;
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
@ -35,13 +35,19 @@ OMP45Stream<T>::~OMP45Stream()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void OMP45Stream<T>::write_arrays(const std::vector<T>& h_a, const std::vector<T>& h_b, const std::vector<T>& h_c)
|
void OMP45Stream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
{
|
{
|
||||||
|
unsigned int array_size = this->array_size;
|
||||||
T *a = this->a;
|
T *a = this->a;
|
||||||
T *b = this->b;
|
T *b = this->b;
|
||||||
T *c = this->c;
|
T *c = this->c;
|
||||||
#pragma omp target update to(a[0:array_size], b[0:array_size], c[0:array_size])
|
#pragma omp target teams distribute parallel for simd map(to: a[0:array_size], b[0:array_size], c[0:array_size])
|
||||||
{}
|
for (int i = 0; i < array_size; i++)
|
||||||
|
{
|
||||||
|
a[i] = initA;
|
||||||
|
b[i] = initB;
|
||||||
|
c[i] = initC;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
|||||||
@ -38,7 +38,7 @@ class OMP45Stream : public Stream<T>
|
|||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
virtual T dot() override;
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -21,12 +21,6 @@ RAJAStream<T>::RAJAStream(const unsigned int ARRAY_SIZE, const int device_index)
|
|||||||
d_a = new T[ARRAY_SIZE];
|
d_a = new T[ARRAY_SIZE];
|
||||||
d_b = new T[ARRAY_SIZE];
|
d_b = new T[ARRAY_SIZE];
|
||||||
d_c = new T[ARRAY_SIZE];
|
d_c = new T[ARRAY_SIZE];
|
||||||
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
|
||||||
{
|
|
||||||
d_a[index] = 0.0;
|
|
||||||
d_b[index] = 0.0;
|
|
||||||
d_c[index] = 0.0;
|
|
||||||
});
|
|
||||||
#else
|
#else
|
||||||
cudaMallocManaged((void**)&d_a, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal);
|
cudaMallocManaged((void**)&d_a, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal);
|
||||||
cudaMallocManaged((void**)&d_b, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal);
|
cudaMallocManaged((void**)&d_b, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal);
|
||||||
@ -50,12 +44,17 @@ RAJAStream<T>::~RAJAStream()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void RAJAStream<T>::write_arrays(
|
void RAJAStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c)
|
|
||||||
{
|
{
|
||||||
std::copy(a.begin(), a.end(), d_a);
|
T* a = d_a;
|
||||||
std::copy(b.begin(), b.end(), d_b);
|
T* b = d_b;
|
||||||
std::copy(c.begin(), c.end(), d_c);
|
T* c = d_c;
|
||||||
|
forall<policy>(index_set, [=] RAJA_DEVICE (int index)
|
||||||
|
{
|
||||||
|
a[index] = initA;
|
||||||
|
b[index] = initB;
|
||||||
|
c[index] = initC;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
|||||||
@ -53,8 +53,7 @@ class RAJAStream : public Stream<T>
|
|||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
virtual T dot() override;
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void write_arrays(
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
|
||||||
virtual void read_arrays(
|
virtual void read_arrays(
|
||||||
std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -20,6 +20,7 @@ program * p;
|
|||||||
|
|
||||||
/* Forward declaration of SYCL kernels */
|
/* Forward declaration of SYCL kernels */
|
||||||
namespace kernels {
|
namespace kernels {
|
||||||
|
class init;
|
||||||
class copy;
|
class copy;
|
||||||
class mul;
|
class mul;
|
||||||
class add;
|
class add;
|
||||||
@ -60,6 +61,7 @@ SYCLStream<T>::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index)
|
|||||||
|
|
||||||
/* Pre-build the kernels */
|
/* Pre-build the kernels */
|
||||||
p = new program(queue->get_context());
|
p = new program(queue->get_context());
|
||||||
|
p->build_from_kernel_name<kernels::init>();
|
||||||
p->build_from_kernel_name<kernels::copy>();
|
p->build_from_kernel_name<kernels::copy>();
|
||||||
p->build_from_kernel_name<kernels::mul>();
|
p->build_from_kernel_name<kernels::mul>();
|
||||||
p->build_from_kernel_name<kernels::add>();
|
p->build_from_kernel_name<kernels::add>();
|
||||||
@ -201,17 +203,23 @@ T SYCLStream<T>::dot()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void SYCLStream<T>::write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c)
|
void SYCLStream<T>::init_arrays(T initA, T initB, T initC)
|
||||||
{
|
{
|
||||||
auto _a = d_a->template get_access<access::mode::write, access::target::host_buffer>();
|
queue->submit([&](handler &cgh)
|
||||||
auto _b = d_b->template get_access<access::mode::write, access::target::host_buffer>();
|
|
||||||
auto _c = d_c->template get_access<access::mode::write, access::target::host_buffer>();
|
|
||||||
for (int i = 0; i < array_size; i++)
|
|
||||||
{
|
{
|
||||||
_a[i] = a[i];
|
auto ka = d_a->template get_access<access::mode::write>(cgh);
|
||||||
_b[i] = b[i];
|
auto kb = d_b->template get_access<access::mode::write>(cgh);
|
||||||
_c[i] = c[i];
|
auto kc = d_c->template get_access<access::mode::write>(cgh);
|
||||||
}
|
cgh.parallel_for<kernels::init>(p->get_kernel<kernels::init>(),
|
||||||
|
range<1>{array_size}, [=](item<1> item)
|
||||||
|
{
|
||||||
|
auto id = item.get();
|
||||||
|
ka[id[0]] = initA;
|
||||||
|
kb[id[0]] = initB;
|
||||||
|
kc[id[0]] = initC;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
queue->wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
|||||||
@ -44,7 +44,7 @@ class SYCLStream : public Stream<T>
|
|||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
virtual T dot() override;
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
2
Stream.h
2
Stream.h
@ -32,7 +32,7 @@ class Stream
|
|||||||
virtual T dot() = 0;
|
virtual T dot() = 0;
|
||||||
|
|
||||||
// Copy memory between host and device
|
// Copy memory between host and device
|
||||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) = 0;
|
virtual void init_arrays(T initA, T initB, T initC) = 0;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) = 0;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) = 0;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
8
main.cpp
8
main.cpp
@ -84,9 +84,9 @@ void run()
|
|||||||
std::cout << "Precision: double" << std::endl;
|
std::cout << "Precision: double" << std::endl;
|
||||||
|
|
||||||
// Create host vectors
|
// Create host vectors
|
||||||
std::vector<T> a(ARRAY_SIZE, startA);
|
std::vector<T> a(ARRAY_SIZE);
|
||||||
std::vector<T> b(ARRAY_SIZE, startB);
|
std::vector<T> b(ARRAY_SIZE);
|
||||||
std::vector<T> c(ARRAY_SIZE, startC);
|
std::vector<T> c(ARRAY_SIZE);
|
||||||
std::streamsize ss = std::cout.precision();
|
std::streamsize ss = std::cout.precision();
|
||||||
std::cout << std::setprecision(1) << std::fixed
|
std::cout << std::setprecision(1) << std::fixed
|
||||||
<< "Array size: " << ARRAY_SIZE*sizeof(T)*1.0E-6 << " MB"
|
<< "Array size: " << ARRAY_SIZE*sizeof(T)*1.0E-6 << " MB"
|
||||||
@ -138,7 +138,7 @@ void run()
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
stream->write_arrays(a, b, c);
|
stream->init_arrays(startA, startB, startC);
|
||||||
|
|
||||||
// List of times
|
// List of times
|
||||||
std::vector<std::vector<double>> timings(5);
|
std::vector<std::vector<double>> timings(5);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user