From 1e976ff1502fb86ab618920c666d2afb7f170034 Mon Sep 17 00:00:00 2001 From: James Price Date: Fri, 18 Nov 2016 00:14:46 +0000 Subject: [PATCH] [SYCL] Fix multiple template specializations --- SYCLStream.cpp | 39 +++++++++++++++------------------------ SYCLStream.h | 18 ++++++++++++++++++ main.cpp | 4 +--- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/SYCLStream.cpp b/SYCLStream.cpp index c8a908c..6160fc1 100644 --- a/SYCLStream.cpp +++ b/SYCLStream.cpp @@ -18,16 +18,6 @@ std::vector devices; void getDeviceList(void); program * p; -/* Forward declaration of SYCL kernels */ -namespace kernels { - class init; - class copy; - class mul; - class add; - class triad; - class dot; -} - template SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) { @@ -61,12 +51,12 @@ SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) /* Pre-build the kernels */ p = new program(queue->get_context()); - p->build_from_kernel_name(); - p->build_from_kernel_name(); - p->build_from_kernel_name(); - p->build_from_kernel_name(); - p->build_from_kernel_name(); - p->build_from_kernel_name(); + p->build_from_kernel_name(); + p->build_from_kernel_name(); + p->build_from_kernel_name(); + p->build_from_kernel_name(); + p->build_from_kernel_name(); + p->build_from_kernel_name(); // Create buffers d_a = new buffer(array_size); @@ -94,7 +84,7 @@ void SYCLStream::copy() { auto ka = d_a->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), + cgh.parallel_for(p->get_kernel(), range<1>{array_size}, [=](item<1> item) { auto id = item.get(); @@ -112,7 +102,7 @@ void SYCLStream::mul() { auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), + cgh.parallel_for(p->get_kernel(), range<1>{array_size}, [=](item<1> item) { auto id = item.get(); @@ -130,7 +120,7 @@ void SYCLStream::add() auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), + cgh.parallel_for(p->get_kernel(), range<1>{array_size}, [=](item<1> item) { auto id = item.get(); @@ -149,7 +139,7 @@ void SYCLStream::triad() auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), + cgh.parallel_for(p->get_kernel(), range<1>{array_size}, [=](item<1> item) { auto id = item.get(); @@ -172,7 +162,8 @@ T SYCLStream::dot() size_t N = array_size; - cgh.parallel_for(nd_range<1>(dot_num_groups*dot_wgsize, dot_wgsize), [=](nd_item<1> item) + cgh.parallel_for(p->get_kernel(), + nd_range<1>(dot_num_groups*dot_wgsize, dot_wgsize), [=](nd_item<1> item) { size_t i = item.get_global(0); size_t li = item.get_local(0); @@ -210,8 +201,8 @@ void SYCLStream::init_arrays(T initA, T initB, T initC) auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), - range<1>{array_size}, [=](item<1> item) + cgh.parallel_for(p->get_kernel(), + range<1>{array_size}, [=](item<1> item) { auto id = item.get(); ka[id[0]] = initA; @@ -311,5 +302,5 @@ std::string getDeviceDriver(const int device) // TODO: Fix kernel names to allow multiple template specializations -//template class SYCLStream; +template class SYCLStream; template class SYCLStream; diff --git a/SYCLStream.h b/SYCLStream.h index f3c8d25..ab62ecd 100644 --- a/SYCLStream.h +++ b/SYCLStream.h @@ -15,6 +15,16 @@ #define IMPLEMENTATION_STRING "SYCL" +namespace sycl_kernels +{ + template class init; + template class copy; + template class mul; + template class add; + template class triad; + template class dot; +} + template class SYCLStream : public Stream { @@ -29,6 +39,14 @@ class SYCLStream : public Stream cl::sycl::buffer *d_c; cl::sycl::buffer *d_sum; + // SYCL kernel names + typedef sycl_kernels::init init_kernel; + typedef sycl_kernels::copy copy_kernel; + typedef sycl_kernels::mul mul_kernel; + typedef sycl_kernels::add add_kernel; + typedef sycl_kernels::triad triad_kernel; + typedef sycl_kernels::dot dot_kernel; + // NDRange configuration for the dot kernel size_t dot_num_groups; size_t dot_wgsize; diff --git a/main.cpp b/main.cpp index 16e3241..2d80814 100644 --- a/main.cpp +++ b/main.cpp @@ -61,13 +61,11 @@ int main(int argc, char *argv[]) parseArguments(argc, argv); - // TODO: Fix SYCL to allow multiple template specializations -#ifndef SYCL + // TODO: Fix Kokkos to allow multiple template specializations #ifndef KOKKOS if (use_float) run(); else -#endif #endif run();