From cbf97dc7d989ba88a55126dc735df4258665b9b0 Mon Sep 17 00:00:00 2001 From: James Price Date: Wed, 26 Oct 2016 15:18:31 +0100 Subject: [PATCH] [SYCL] Automatically determine dot NDRange config --- SYCLStream.cpp | 24 +++++++++++++++++------- SYCLStream.h | 4 ++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/SYCLStream.cpp b/SYCLStream.cpp index e5fd9c6..e78651b 100644 --- a/SYCLStream.cpp +++ b/SYCLStream.cpp @@ -13,9 +13,6 @@ using namespace cl::sycl; #define WGSIZE 256 -#define DOT_WGSIZE 256 -#define DOT_NUM_GROUPS 256 - // Cache list of devices bool cached = false; std::vector devices; @@ -41,9 +38,22 @@ SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) throw std::runtime_error("Invalid device index"); device dev = devices[device_index]; + // Determine sensible dot kernel NDRange configuration + if (dev.is_cpu()) + { + dot_num_groups = dev.get_info(); + dot_wgsize = dev.get_info() * 2; + } + else + { + dot_num_groups = dev.get_info() * 4; + dot_wgsize = dev.get_info(); + } + // Print out device information std::cout << "Using SYCL device " << getDeviceName(device_index) << std::endl; std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; + std::cout << "Dot kernel config: " << dot_num_groups << " groups of size " << dot_wgsize << std::endl; queue = new cl::sycl::queue(dev); @@ -51,7 +61,7 @@ SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) d_a = new buffer(array_size); d_b = new buffer(array_size); d_c = new buffer(array_size); - d_sum = new buffer(DOT_NUM_GROUPS); + d_sum = new buffer(dot_num_groups); } template @@ -138,11 +148,11 @@ T SYCLStream::dot() auto kb = d_b->template get_access(cgh); auto ksum = d_sum->template get_access(cgh); - auto wg_sum = accessor(range<1>(DOT_WGSIZE), cgh); + auto wg_sum = accessor(range<1>(dot_wgsize), cgh); size_t N = array_size; - cgh.parallel_for(nd_range<1>(DOT_NUM_GROUPS*DOT_WGSIZE, DOT_WGSIZE), [=](nd_item<1> item) + cgh.parallel_for(nd_range<1>(dot_num_groups*dot_wgsize, dot_wgsize), [=](nd_item<1> item) { size_t i = item.get_global(0); size_t li = item.get_local(0); @@ -164,7 +174,7 @@ T SYCLStream::dot() T sum = 0.0; auto h_sum = d_sum->template get_access(); - for (int i = 0; i < DOT_NUM_GROUPS; i++) + for (int i = 0; i < dot_num_groups; i++) { sum += h_sum[i]; } diff --git a/SYCLStream.h b/SYCLStream.h index ce3225e..6f7205b 100644 --- a/SYCLStream.h +++ b/SYCLStream.h @@ -29,6 +29,10 @@ class SYCLStream : public Stream cl::sycl::buffer *d_c; cl::sycl::buffer *d_sum; + // NDRange configuration for the dot kernel + size_t dot_num_groups; + size_t dot_wgsize; + public: SYCLStream(const unsigned int, const int);