diff --git a/CMakeLists.txt b/CMakeLists.txt index 71a95d6..b078ab5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,9 +40,18 @@ endif () # TODO: Find SYCL implementations somehow if (true) - add_executable(gpu-stream-sycl main.cpp SYCLStream.cpp) - target_compile_definitions(gpu-stream-sycl PUBLIC SYCL) - set_property(TARGET gpu-stream-sycl PROPERTY CXX_STANDARD 14) + # ComputeCpp + # TODO: Sort this out properly! + add_custom_target(gpu-stream-sycl + COMMAND compute++ ${CMAKE_CURRENT_SOURCE_DIR}/SYCLStream.cpp -sycl -O2 -emit-llvm -o SYCLStream.bc -c + COMMAND /usr/bin/c++ ${CMAKE_CURRENT_SOURCE_DIR}/SYCLStream.cpp -O2 -std=c++11 -include SYCLStream.sycl -o SYCLStream.o -c + COMMAND /usr/bin/c++ ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp -O2 -std=c++11 SYCLStream.o -include SYCLStream.sycl -lSYCL -lOpenCL -o gpu-stream-sycl -DSYCL + ) + + # triSYCL + #add_executable(gpu-stream-sycl main.cpp SYCLStream.cpp) + #target_compile_definitions(gpu-stream-sycl PUBLIC SYCL) + #set_property(TARGET gpu-stream-sycl PROPERTY CXX_STANDARD 14) else () message("Skipping SYCL...") endif () diff --git a/SYCLStream.cpp b/SYCLStream.cpp index 1314aa4..1c4ed17 100644 --- a/SYCLStream.cpp +++ b/SYCLStream.cpp @@ -17,14 +17,17 @@ SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) array_size = ARRAY_SIZE; // Create buffers - d_a = buffer(array_size); - d_b = buffer(array_size); - d_c = buffer(array_size); + d_a = new buffer(array_size); + d_b = new buffer(array_size); + d_c = new buffer(array_size); } template SYCLStream::~SYCLStream() { + delete d_a; + delete d_b; + delete d_c; } template @@ -32,9 +35,9 @@ void SYCLStream::copy() { queue.submit([&](handler &cgh) { - auto ka = d_a.template get_access(cgh); - auto kc = d_c.template get_access(cgh); - cgh.parallel_for(range<1>{array_size}, [=](id<1> index) + auto ka = d_a->template get_access(cgh); + auto kc = d_c->template get_access(cgh); + cgh.parallel_for(range<1>{array_size}, [=](id<1> index) { kc[index] = ka[index]; }); @@ -48,9 +51,9 @@ void SYCLStream::mul() const T scalar = 3.0; queue.submit([&](handler &cgh) { - auto kb = d_b.template get_access(cgh); - auto kc = d_c.template get_access(cgh); - cgh.parallel_for(range<1>{array_size}, [=](id<1> index) + auto kb = d_b->template get_access(cgh); + auto kc = d_c->template get_access(cgh); + cgh.parallel_for(range<1>{array_size}, [=](id<1> index) { kb[index] = scalar * kc[index]; }); @@ -63,10 +66,10 @@ void SYCLStream::add() { queue.submit([&](handler &cgh) { - auto ka = d_a.template get_access(cgh); - auto kb = d_b.template get_access(cgh); - auto kc = d_c.template get_access(cgh); - cgh.parallel_for(range<1>{array_size}, [=](id<1> index) + auto ka = d_a->template get_access(cgh); + auto kb = d_b->template get_access(cgh); + auto kc = d_c->template get_access(cgh); + cgh.parallel_for(range<1>{array_size}, [=](id<1> index) { kc[index] = ka[index] + kb[index]; }); @@ -80,10 +83,10 @@ void SYCLStream::triad() const T scalar = 3.0; queue.submit([&](handler &cgh) { - auto ka = d_a.template get_access(cgh); - auto kb = d_b.template get_access(cgh); - auto kc = d_c.template get_access(cgh); - cgh.parallel_for(range<1>{array_size}, [=](id<1> index){ + auto ka = d_a->template get_access(cgh); + auto kb = d_b->template get_access(cgh); + auto kc = d_c->template get_access(cgh); + cgh.parallel_for(range<1>{array_size}, [=](id<1> index){ ka[index] = kb[index] + scalar * kc[index]; }); }); @@ -93,9 +96,9 @@ void SYCLStream::triad() template void SYCLStream::write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) { - auto _a = d_a.template get_access(); - auto _b = d_b.template get_access(); - auto _c = d_c.template get_access(); + auto _a = d_a->template get_access(); + auto _b = d_b->template get_access(); + auto _c = d_c->template get_access(); for (int i = 0; i < array_size; i++) { _a[i] = a[i]; @@ -107,9 +110,9 @@ void SYCLStream::write_arrays(const std::vector& a, const std::vector& template void SYCLStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) { - auto _a = d_a.template get_access(); - auto _b = d_b.template get_access(); - auto _c = d_c.template get_access(); + auto _a = d_a->template get_access(); + auto _b = d_b->template get_access(); + auto _c = d_c->template get_access(); for (int i = 0; i < array_size; i++) { a[i] = _a[i]; @@ -140,5 +143,6 @@ std::string getDeviceDriver(const int device) } -template class SYCLStream; +// TODO: Fix kernel names to allow multiple template specializations +//template class SYCLStream; template class SYCLStream; diff --git a/SYCLStream.h b/SYCLStream.h index e10535e..f4d79d5 100644 --- a/SYCLStream.h +++ b/SYCLStream.h @@ -22,9 +22,9 @@ class SYCLStream : public Stream // SYCL objects cl::sycl::queue queue; - cl::sycl::buffer d_a; - cl::sycl::buffer d_b; - cl::sycl::buffer d_c; + cl::sycl::buffer *d_a; + cl::sycl::buffer *d_b; + cl::sycl::buffer *d_c; public: diff --git a/main.cpp b/main.cpp index f67fdd6..4794f9b 100644 --- a/main.cpp +++ b/main.cpp @@ -57,9 +57,12 @@ int main(int argc, char *argv[]) parseArguments(argc, argv); + // TODO: Fix SYCL to allow multiple template specializations +#ifndef SYCL if (use_float) run(); else +#endif run(); }