From 7a3a546a6e208743cedd148429430e2e9337fd32 Mon Sep 17 00:00:00 2001 From: Tom Deakin Date: Tue, 26 Apr 2016 16:06:17 +0100 Subject: [PATCH] Add mul CUDA kernel --- src/CUDAStream.cu | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/CUDAStream.cu b/src/CUDAStream.cu index 15b1b41..f08c40b 100644 --- a/src/CUDAStream.cu +++ b/src/CUDAStream.cu @@ -82,10 +82,20 @@ void CUDAStream::mul() check_error(); } +template +__global__ void add_kernel(const T * a, const T * b, T * c) +{ + const int i = blockDim.x * blockIdx.x + threadIdx.x; + c[i] = a[i] + b[i]; +} + template void CUDAStream::add() { - return; + add_kernel<<<1024, 1024>>>(d_a, d_b, d_c); + check_error(); + cudaDeviceSynchronize(); + check_error(); } template