Add triad kernel

This commit is contained in:
Tom Deakin 2016-04-26 16:07:32 +01:00
parent 7a3a546a6e
commit 319e11011c

View File

@ -98,10 +98,21 @@ void CUDAStream<T>::add()
check_error();
}
template <typename T>
__global__ void triad_kernel(T * a, const T * b, const T * c)
{
const T scalar = 3.0;
const int i = blockDim.x * blockIdx.x + threadIdx.x;
a[i] = b[i] + scalar * c[i];
}
template <class T>
void CUDAStream<T>::triad()
{
return;
triad_kernel<<<1024, 1024>>>(d_a, d_b, d_c);
check_error();
cudaDeviceSynchronize();
check_error();
}
template class CUDAStream<float>;