diff --git a/SYCLStream.cpp b/SYCLStream.cpp index 45cf477..995eb1b 100644 --- a/SYCLStream.cpp +++ b/SYCLStream.cpp @@ -16,6 +16,9 @@ using namespace cl::sycl; bool cached = false; std::vector devices; void getDeviceList(void); +#ifdef COMPUTECPP_CE +program * p; +#endif template SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) @@ -66,6 +69,17 @@ SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) throw std::runtime_error("SYCL errors detected"); } }); + + #ifdef COMPUTECPP_CE + /* Pre-build the kernels */ + p = new program(queue->get_context()); + p->build_with_kernel_type(); + p->build_with_kernel_type(); + p->build_with_kernel_type(); + p->build_with_kernel_type(); + p->build_with_kernel_type(); + p->build_with_kernel_type(); + #endif // Create buffers d_a = new buffer(array_size); @@ -81,8 +95,9 @@ SYCLStream::~SYCLStream() delete d_b; delete d_c; delete d_sum; - - + #ifdef COMPUTECPP_CE + delete p; + #endif delete queue; devices.clear(); } @@ -94,7 +109,12 @@ void SYCLStream::copy() { auto ka = d_a->template get_access(cgh); auto kc = d_c->template get_access(cgh); + #ifdef COMPUTECPP_CE + cgh.parallel_for(p->get_kernel(), + range<1>{array_size}, [=](item<1> item) + #else cgh.parallel_for(range<1>{array_size}, [=](item<1> item) + #endif { auto id = item.get_id(0); kc[id] = ka[id]; @@ -111,7 +131,12 @@ void SYCLStream::mul() { auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); + #ifdef COMPUTECPP_CE + cgh.parallel_for(p->get_kernel(), + range<1>{array_size}, [=](item<1> item) + #else cgh.parallel_for(range<1>{array_size}, [=](item<1> item) + #endif { auto id = item.get_id(0); kb[id] = scalar * kc[id]; @@ -128,7 +153,12 @@ void SYCLStream::add() auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); + #ifdef COMPUTECPP_CE + cgh.parallel_for(p->get_kernel(), + range<1>{array_size}, [=](item<1> item) + #else cgh.parallel_for(range<1>{array_size}, [=](item<1> item) + #endif { auto id = item.get_id(0); kc[id] = ka[id] + kb[id]; @@ -146,7 +176,12 @@ void SYCLStream::triad() auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); + #ifdef COMPUTECPP_CE + cgh.parallel_for(p->get_kernel(), + range<1>{array_size}, [=](item<1> item) + #else cgh.parallel_for(range<1>{array_size}, [=](item<1> item) + #endif { auto id = item.get_id(0); ka[id] = kb[id] + scalar * kc[id]; @@ -167,8 +202,12 @@ T SYCLStream::dot() auto wg_sum = accessor(range<1>(dot_wgsize), cgh); size_t N = array_size; - + #ifdef COMPUTECPP_CE + cgh.parallel_for(p->get_kernel(), + nd_range<1>(dot_num_groups*dot_wgsize, dot_wgsize), [=](nd_item<1> item) + #else cgh.parallel_for(nd_range<1>(dot_num_groups*dot_wgsize, dot_wgsize), [=](nd_item<1> item) + #endif { size_t i = item.get_global_id(0); size_t li = item.get_local_id(0); @@ -209,7 +248,12 @@ void SYCLStream::init_arrays(T initA, T initB, T initC) auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); + #ifdef COMPUTECPP_CE + cgh.parallel_for(p->get_kernel(), + range<1>{array_size}, [=](item<1> item) + #else cgh.parallel_for(range<1>{array_size}, [=](item<1> item) + #endif { auto id = item.get_id(0); ka[id] = initA; diff --git a/SYCLStream.h b/SYCLStream.h index ab62ecd..9c06f6f 100644 --- a/SYCLStream.h +++ b/SYCLStream.h @@ -15,6 +15,9 @@ #define IMPLEMENTATION_STRING "SYCL" +// allows a use of 'parallel_for' currently known to be supported by ComputeCpp +#define COMPUTECPP_CE + namespace sycl_kernels { template class init;