From 21556af50070722f6c2fc255588c2b0afb99f7ba Mon Sep 17 00:00:00 2001 From: James Price Date: Wed, 26 Oct 2016 15:18:13 +0100 Subject: [PATCH] [OCL] Automatically determine dot NDRange config --- OCLStream.cpp | 21 +++++++++++++++++---- OCLStream.h | 8 ++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/OCLStream.cpp b/OCLStream.cpp index 928421f..199eff5 100644 --- a/OCLStream.cpp +++ b/OCLStream.cpp @@ -90,9 +90,22 @@ OCLStream::OCLStream(const unsigned int ARRAY_SIZE, const int device_index) throw std::runtime_error("Invalid device index"); device = devices[device_index]; + // Determine sensible dot kernel NDRange configuration + if (device.getInfo() & CL_DEVICE_TYPE_CPU) + { + dot_num_groups = device.getInfo(); + dot_wgsize = device.getInfo() * 2; + } + else + { + dot_num_groups = device.getInfo() * 4; + dot_wgsize = device.getInfo(); + } + // Print out device information std::cout << "Using OpenCL device " << getDeviceName(device_index) << std::endl; std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; + std::cout << "Dot kernel config: " << dot_num_groups << " groups of size " << dot_wgsize << std::endl; context = cl::Context(device); queue = cl::CommandQueue(context); @@ -147,9 +160,9 @@ OCLStream::OCLStream(const unsigned int ARRAY_SIZE, const int device_index) d_a = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(T) * ARRAY_SIZE); d_b = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(T) * ARRAY_SIZE); d_c = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(T) * ARRAY_SIZE); - d_sum = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(T) * DOT_NUM_GROUPS); + d_sum = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(T) * dot_num_groups); - sums = std::vector(DOT_NUM_GROUPS); + sums = std::vector(dot_num_groups); } template @@ -205,8 +218,8 @@ template T OCLStream::dot() { (*dot_kernel)( - cl::EnqueueArgs(queue, cl::NDRange(DOT_NUM_GROUPS*DOT_WGSIZE), cl::NDRange(DOT_WGSIZE)), - d_a, d_b, d_sum, cl::Local(sizeof(T) * DOT_WGSIZE), array_size + cl::EnqueueArgs(queue, cl::NDRange(dot_num_groups*dot_wgsize), cl::NDRange(dot_wgsize)), + d_a, d_b, d_sum, cl::Local(sizeof(T) * dot_wgsize), array_size ); cl::copy(queue, d_sum, sums.begin(), sums.end()); diff --git a/OCLStream.h b/OCLStream.h index 20e5049..ab10a7b 100644 --- a/OCLStream.h +++ b/OCLStream.h @@ -21,10 +21,6 @@ #define IMPLEMENTATION_STRING "OpenCL" -// NDRange configuration for the dot kernel -#define DOT_WGSIZE 256 -#define DOT_NUM_GROUPS 256 - template class OCLStream : public Stream { @@ -52,6 +48,10 @@ class OCLStream : public Stream cl::KernelFunctor *triad_kernel; cl::KernelFunctor *dot_kernel; + // NDRange configuration for the dot kernel + size_t dot_num_groups; + size_t dot_wgsize; + public: OCLStream(const unsigned int, const int);