diff --git a/SYCLStream.cpp b/SYCLStream.cpp index 171c9f7..ebe5f63 100644 --- a/SYCLStream.cpp +++ b/SYCLStream.cpp @@ -11,7 +11,6 @@ using namespace cl::sycl; - // Cache list of devices bool cached = false; std::vector devices; @@ -67,16 +66,7 @@ SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) throw std::runtime_error("SYCL errors detected"); } }); - - /* Pre-build the kernels */ - p = new program(queue->get_context()); - p->build_with_kernel_type(); - p->build_with_kernel_type(); - p->build_with_kernel_type(); - p->build_with_kernel_type(); - p->build_with_kernel_type(); - p->build_with_kernel_type(); - + // Create buffers d_a = new buffer(array_size); d_b = new buffer(array_size); @@ -91,7 +81,6 @@ SYCLStream::~SYCLStream() delete d_b; delete d_c; delete d_sum; - delete p; delete queue; devices.clear(); @@ -104,8 +93,7 @@ void SYCLStream::copy() { auto ka = d_a->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), - range<1>{array_size}, [=](item<1> item) + cgh.parallel_for(range<1>{array_size}, [=](item<1> item) { auto id = item.get_id(0); kc[id] = ka[id]; @@ -122,8 +110,7 @@ void SYCLStream::mul() { auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), - range<1>{array_size}, [=](item<1> item) + cgh.parallel_for(range<1>{array_size}, [=](item<1> item) { auto id = item.get_id(0); kb[id] = scalar * kc[id]; @@ -140,8 +127,7 @@ void SYCLStream::add() auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), - range<1>{array_size}, [=](item<1> item) + cgh.parallel_for(range<1>{array_size}, [=](item<1> item) { auto id = item.get_id(0); kc[id] = ka[id] + kb[id]; @@ -159,8 +145,7 @@ void SYCLStream::triad() auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), - range<1>{array_size}, [=](item<1> item) + cgh.parallel_for(range<1>{array_size}, [=](item<1> item) { auto id = item.get_id(0); ka[id] = kb[id] + scalar * kc[id]; @@ -181,12 +166,10 @@ T SYCLStream::dot() auto wg_sum = accessor(range<1>(dot_wgsize), cgh); size_t N = array_size; - - cgh.parallel_for(p->get_kernel(), - nd_range<1>(dot_num_groups*dot_wgsize, dot_wgsize), [=](nd_item<1> item) + cgh.parallel_for(nd_range<1>(dot_num_groups*dot_wgsize, dot_wgsize), [=](nd_item<1> item) { - size_t i = item.get_global(0); - size_t li = item.get_local(0); + size_t i = item.get_global_id(0); + size_t li = item.get_local_id(0); size_t global_size = item.get_global_range()[0]; wg_sum[li] = 0.0; @@ -224,8 +207,7 @@ void SYCLStream::init_arrays(T initA, T initB, T initC) auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(p->get_kernel(), - range<1>{array_size}, [=](item<1> item) + cgh.parallel_for(range<1>{array_size}, [=](item<1> item) { auto id = item.get_id(0); ka[id] = initA;