diff --git a/OCLStream.cpp b/OCLStream.cpp index 6c88eda..be88ba9 100644 --- a/OCLStream.cpp +++ b/OCLStream.cpp @@ -61,6 +61,14 @@ std::string kernels{R"CLC( const size_t i = get_global_id(0); a[i] = b[i] + scalar * c[i]; } + kernel void nstream( + global TYPE * restrict a, + global const TYPE * restrict b, + global const TYPE * restrict c) + { + const size_t i = get_global_id(0); + a[i] += b[i] + scalar * c[i]; + } kernel void stream_dot( global const TYPE * restrict a, @@ -157,6 +165,7 @@ OCLStream::OCLStream(const int ARRAY_SIZE, const int device_index) mul_kernel = new cl::KernelFunctor(program, "mul"); add_kernel = new cl::KernelFunctor(program, "add"); triad_kernel = new cl::KernelFunctor(program, "triad"); + nstream_kernel = new cl::KernelFunctor(program, "nstream"); dot_kernel = new cl::KernelFunctor(program, "stream_dot"); array_size = ARRAY_SIZE; @@ -186,6 +195,7 @@ OCLStream::~OCLStream() delete mul_kernel; delete add_kernel; delete triad_kernel; + delete nstream_kernel; delete dot_kernel; devices.clear(); @@ -231,6 +241,16 @@ void OCLStream::triad() queue.finish(); } +template +void OCLStream::nstream() +{ + (*nstream_kernel)( + cl::EnqueueArgs(queue, cl::NDRange(array_size)), + d_a, d_b, d_c + ); + queue.finish(); +} + template T OCLStream::dot() { diff --git a/OCLStream.h b/OCLStream.h index 3085aca..bcdf9ac 100644 --- a/OCLStream.h +++ b/OCLStream.h @@ -47,6 +47,7 @@ class OCLStream : public Stream cl::KernelFunctor * mul_kernel; cl::KernelFunctor *add_kernel; cl::KernelFunctor *triad_kernel; + cl::KernelFunctor *nstream_kernel; cl::KernelFunctor *dot_kernel; // NDRange configuration for the dot kernel @@ -62,6 +63,7 @@ class OCLStream : public Stream virtual void add() override; virtual void mul() override; virtual void triad() override; + virtual void nstream() override; virtual T dot() override; virtual void init_arrays(T initA, T initB, T initC) override;