Add CUDA nstream kernel

This commit is contained in:
Tom Deakin 2021-02-02 12:32:33 +00:00
parent fa477bd466
commit 05e3e5a127
2 changed files with 18 additions and 0 deletions

View File

@ -212,6 +212,23 @@ void CUDAStream<T>::triad()
check_error(); check_error();
} }
template <typename T>
__global__ void nstream_kernel(T * a, const T * b, const T * c)
{
const T scalar = startScalar;
const int i = blockDim.x * blockIdx.x + threadIdx.x;
a[i] += b[i] + scalar * c[i];
}
template <class T>
void CUDAStream<T>::nstream()
{
nstream_kernel<<<array_size/TBSIZE, TBSIZE>>>(d_a, d_b, d_c);
check_error();
cudaDeviceSynchronize();
check_error();
}
template <class T> template <class T>
__global__ void dot_kernel(const T * a, const T * b, T * sum, int array_size) __global__ void dot_kernel(const T * a, const T * b, T * sum, int array_size)
{ {

View File

@ -50,6 +50,7 @@ class CUDAStream : public Stream<T>
virtual void add() override; virtual void add() override;
virtual void mul() override; virtual void mul() override;
virtual void triad() override; virtual void triad() override;
virtual void nstream() override;
virtual T dot() override; virtual T dot() override;
virtual void init_arrays(T initA, T initB, T initC) override; virtual void init_arrays(T initA, T initB, T initC) override;