diff --git a/SYCLStream.cpp b/SYCLStream.cpp index 1c4ed17..fc76a82 100644 --- a/SYCLStream.cpp +++ b/SYCLStream.cpp @@ -11,6 +11,8 @@ using namespace cl::sycl; +#define WGSIZE 64 + template SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) { @@ -37,9 +39,9 @@ void SYCLStream::copy() { auto ka = d_a->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(range<1>{array_size}, [=](id<1> index) + cgh.parallel_for(nd_range<1>{array_size, WGSIZE}, [=](nd_item<1> item) { - kc[index] = ka[index]; + kc[item.get_global()] = ka[item.get_global()]; }); }); queue.wait(); @@ -53,9 +55,9 @@ void SYCLStream::mul() { auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(range<1>{array_size}, [=](id<1> index) + cgh.parallel_for(nd_range<1>{array_size, WGSIZE}, [=](nd_item<1> item) { - kb[index] = scalar * kc[index]; + kb[item.get_global()] = scalar * kc[item.get_global()]; }); }); queue.wait(); @@ -69,9 +71,9 @@ void SYCLStream::add() auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(range<1>{array_size}, [=](id<1> index) + cgh.parallel_for(nd_range<1>{array_size, WGSIZE}, [=](nd_item<1> item) { - kc[index] = ka[index] + kb[index]; + kc[item.get_global()] = ka[item.get_global()] + kb[item.get_global()]; }); }); queue.wait(); @@ -86,8 +88,9 @@ void SYCLStream::triad() auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); - cgh.parallel_for(range<1>{array_size}, [=](id<1> index){ - ka[index] = kb[index] + scalar * kc[index]; + cgh.parallel_for(nd_range<1>{array_size, WGSIZE}, [=](nd_item<1> item) + { + ka[item.get_global()] = kb[item.get_global()] + scalar * kc[item.get_global()]; }); }); queue.wait();