diff --git a/CHANGELOG.md b/CHANGELOG.md index 1530392..3e1040d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ All notable changes to this project will be documented in this file. - Normalise sum result by expected value to help false negative errors. - HC version deprecated and moved to a legacy directory. - Update RAJA to v0.13.0 (w/ code changes as this is a source incompatible update). +- Update SYCL version to SYCL 2020. ### Removed - Pre-building of kernels in SYCL version to ensure compatibility with SYCL 1.2.1. diff --git a/CMakeLists.txt b/CMakeLists.txt index 58e0a3b..ad12dbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,6 +123,7 @@ register_model(hip HIP HIPStream.cpp) register_model(cuda CUDA CUDAStream.cu) register_model(kokkos KOKKOS KokkosStream.cpp) register_model(sycl SYCL SYCLStream.cpp) +register_model(sycl2020 SYCL2020 SYCLStream2020.cpp) register_model(acc ACC ACCStream.cpp) # defining RAJA collides with the RAJA namespace so USE_RAJA register_model(raja USE_RAJA RAJAStream.cpp) @@ -206,4 +207,4 @@ if (COMMAND setup_target) setup_target(${EXE_NAME}) endif () -install(TARGETS ${EXE_NAME} DESTINATION bin) \ No newline at end of file +install(TARGETS ${EXE_NAME} DESTINATION bin) diff --git a/src/main.cpp b/src/main.cpp index 2791bdc..5a01b74 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -43,6 +43,8 @@ #include "ACCStream.h" #elif defined(SYCL) #include "SYCLStream.h" +#elif defined(SYCL2020) +#include "SYCLStream2020.h" #elif defined(OMP) #include "OMPStream.h" #endif @@ -282,7 +284,7 @@ void run() // Use the OpenACC implementation stream = new ACCStream(ARRAY_SIZE, deviceIndex); -#elif defined(SYCL) +#elif defined(SYCL) || defined(SYCL2020) // Use the SYCL implementation stream = new SYCLStream(ARRAY_SIZE, deviceIndex); diff --git a/src/sycl2020/SYCLStream2020.cpp b/src/sycl2020/SYCLStream2020.cpp new file mode 100644 index 0000000..17a5ab5 --- /dev/null +++ b/src/sycl2020/SYCLStream2020.cpp @@ -0,0 +1,284 @@ + +// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#include "SYCLStream2020.h" + +#include + +// Cache list of devices +bool cached = false; +std::vector devices; +void getDeviceList(void); + +template +SYCLStream::SYCLStream(const size_t ARRAY_SIZE, const int device_index) +: array_size {ARRAY_SIZE}, + d_a {ARRAY_SIZE}, + d_b {ARRAY_SIZE}, + d_c {ARRAY_SIZE}, + d_sum {1} +{ + if (!cached) + getDeviceList(); + + if (device_index >= devices.size()) + throw std::runtime_error("Invalid device index"); + + sycl::device dev = devices[device_index]; + + // Print out device information + std::cout << "Using SYCL device " << getDeviceName(device_index) << std::endl; + std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; + + // Check device can support FP64 if needed + if (sizeof(T) == sizeof(double)) + { + if (!dev.has(sycl::aspect::fp64)) + { + throw std::runtime_error("Device does not support double precision, please use --float"); + } + } + + queue = std::make_unique(dev, sycl::async_handler{[&](sycl::exception_list l) + { + bool error = false; + for(auto e: l) + { + try + { + std::rethrow_exception(e); + } + catch (sycl::exception e) + { + std::cout << e.what(); + error = true; + } + } + if(error) + { + throw std::runtime_error("SYCL errors detected"); + } + }}); + + // No longer need list of devices + devices.clear(); + cached = true; + + +} + + +template +void SYCLStream::copy() +{ + queue->submit([&](sycl::handler &cgh) + { + sycl::accessor ka {d_a, cgh, sycl::read_only}; + sycl::accessor kc {d_c, cgh, sycl::write_only}; + cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + { + kc[idx] = ka[idx]; + }); + }); + queue->wait(); +} + +template +void SYCLStream::mul() +{ + const T scalar = startScalar; + queue->submit([&](sycl::handler &cgh) + { + sycl::accessor kb {d_b, cgh, sycl::write_only}; + sycl::accessor kc {d_c, cgh, sycl::read_only}; + cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + { + kb[idx] = scalar * kc[idx]; + }); + }); + queue->wait(); +} + +template +void SYCLStream::add() +{ + queue->submit([&](sycl::handler &cgh) + { + sycl::accessor ka {d_a, cgh, sycl::read_only}; + sycl::accessor kb {d_b, cgh, sycl::read_only}; + sycl::accessor kc {d_c, cgh, sycl::write_only}; + cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + { + kc[idx] = ka[idx] + kb[idx]; + }); + }); + queue->wait(); +} + +template +void SYCLStream::triad() +{ + const T scalar = startScalar; + queue->submit([&](sycl::handler &cgh) + { + sycl::accessor ka {d_a, cgh, sycl::write_only}; + sycl::accessor kb {d_b, cgh, sycl::read_only}; + sycl::accessor kc {d_c, cgh, sycl::read_only}; + cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + { + ka[idx] = kb[idx] + scalar * kc[idx]; + }); + }); + queue->wait(); +} + +template +void SYCLStream::nstream() +{ + const T scalar = startScalar; + + queue->submit([&](sycl::handler &cgh) + { + sycl::accessor ka {d_a, cgh}; + sycl::accessor kb {d_b, cgh, sycl::read_only}; + sycl::accessor kc {d_c, cgh, sycl::read_only}; + cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + { + ka[idx] += kb[idx] + scalar * kc[idx]; + }); + }); + queue->wait(); +} + +template +T SYCLStream::dot() +{ + + queue->submit([&](sycl::handler &cgh) + { + sycl::accessor ka {d_a, cgh, sycl::read_only}; + sycl::accessor kb {d_b, cgh, sycl::read_only}; + + cgh.parallel_for(sycl::range<1>{array_size}, + // Reduction object, to perform summation - initialises the result to zero + sycl::reduction(d_sum, cgh, std::plus(), sycl::property::reduction::initialize_to_identity{}), + [=](sycl::id<1> idx, auto& sum) + { + sum += ka[idx] * kb[idx]; + }); + + }); + + // Get access on the host, and return a copy of the data (single number) + // This will block until the result is available, so no need to wait on the queue. + sycl::host_accessor result {d_sum, sycl::read_only}; + return result[0]; + +} + +template +void SYCLStream::init_arrays(T initA, T initB, T initC) +{ + queue->submit([&](sycl::handler &cgh) + { + sycl::accessor ka {d_a, cgh, sycl::write_only, sycl::no_init}; + sycl::accessor kb {d_b, cgh, sycl::write_only, sycl::no_init}; + sycl::accessor kc {d_c, cgh, sycl::write_only, sycl::no_init}; + + cgh.parallel_for(sycl::range<1>{array_size}, [=](sycl::id<1> idx) + { + ka[idx] = initA; + kb[idx] = initB; + kc[idx] = initC; + }); + }); + + queue->wait(); +} + +template +void SYCLStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) +{ + sycl::host_accessor _a {d_a, sycl::read_only}; + sycl::host_accessor _b {d_b, sycl::read_only}; + sycl::host_accessor _c {d_c, sycl::read_only}; + for (int i = 0; i < array_size; i++) + { + a[i] = _a[i]; + b[i] = _b[i]; + c[i] = _c[i]; + } +} + +void getDeviceList(void) +{ + // Ask SYCL runtime for all devices in system + devices = sycl::device::get_devices(); + cached = true; +} + +void listDevices(void) +{ + getDeviceList(); + + // Print device names + if (devices.size() == 0) + { + std::cerr << "No devices found." << std::endl; + } + else + { + std::cout << std::endl; + std::cout << "Devices:" << std::endl; + for (int i = 0; i < devices.size(); i++) + { + std::cout << i << ": " << getDeviceName(i) << std::endl; + } + std::cout << std::endl; + } +} + +std::string getDeviceName(const int device) +{ + if (!cached) + getDeviceList(); + + std::string name; + + if (device < devices.size()) + { + name = devices[device].get_info(); + } + else + { + throw std::runtime_error("Error asking for name for non-existant device"); + } + + return name; +} + +std::string getDeviceDriver(const int device) +{ + if (!cached) + getDeviceList(); + + std::string driver; + + if (device < devices.size()) + { + driver = devices[device].get_info(); + } + else + { + throw std::runtime_error("Error asking for driver for non-existant device"); + } + + return driver; +} + +template class SYCLStream; +template class SYCLStream; diff --git a/src/sycl2020/SYCLStream2020.h b/src/sycl2020/SYCLStream2020.h new file mode 100644 index 0000000..7481d16 --- /dev/null +++ b/src/sycl2020/SYCLStream2020.h @@ -0,0 +1,54 @@ + +// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#pragma once + +#include +#include + +#include "Stream.h" + +#include + +#define IMPLEMENTATION_STRING "SYCL 2020" + +template +class SYCLStream : public Stream +{ + protected: + // Size of arrays + size_t array_size; + + // SYCL objects + // Queue is a pointer because we allow device selection + std::unique_ptr queue; + + // Buffers + sycl::buffer d_a; + sycl::buffer d_b; + sycl::buffer d_c; + sycl::buffer d_sum; + + public: + + SYCLStream(const size_t, const int); + ~SYCLStream() = default; + + virtual void copy() override; + virtual void add() override; + virtual void mul() override; + virtual void triad() override; + virtual void nstream() override; + virtual T dot() override; + + virtual void init_arrays(T initA, T initB, T initC) override; + virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; + +}; + +// Populate the devices list +void getDeviceList(void); diff --git a/src/sycl2020/model.cmake b/src/sycl2020/model.cmake new file mode 100644 index 0000000..e7b5a1c --- /dev/null +++ b/src/sycl2020/model.cmake @@ -0,0 +1,86 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection, this is used for host compilation when required by the SYCL compiler" + "c++") + +register_flag_required(SYCL_COMPILER + "Compile using the specified SYCL compiler implementation + Supported values are + ONEAPI-DPCPP - dpc++ that is part of an oneAPI Base Toolkit distribution (https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit.html) + DPCPP - dpc++ as a standalone compiler (https://github.com/intel/llvm) + HIPSYCL - hipSYCL compiler (https://github.com/illuhad/hipSYCL) + COMPUTECPP - ComputeCpp compiler (https://developer.codeplay.com/products/computecpp/ce/home)") + +register_flag_optional(SYCL_COMPILER_DIR + "Absolute path to the selected SYCL compiler directory, most are packaged differently so set the path according to `SYCL_COMPILER`: + ONEAPI-DPCPP - not required but `dpcpp` must be on PATH, load oneAPI as per documentation (i.e `source /opt/intel/oneapi/setvars.sh` first) + HIPSYCL|DPCPP|COMPUTECPP - set to the root of the binary distribution that contains at least `bin/`, `include/`, and `lib/`" + "") + +register_flag_optional(OpenCL_LIBRARY + "[ComputeCpp only] Path to OpenCL library, usually called libOpenCL.so" + "${OpenCL_LIBRARY}") + +macro(setup) + set(CMAKE_CXX_STANDARD 17) + + + if (${SYCL_COMPILER} STREQUAL "HIPSYCL") + + + set(hipSYCL_DIR ${SYCL_COMPILER_DIR}/lib/cmake/hipSYCL) + + if (NOT EXISTS "${hipSYCL_DIR}") + message(WARNING "Falling back to hipSYCL < 0.9.0 CMake structure") + set(hipSYCL_DIR ${SYCL_COMPILER_DIR}/lib/cmake) + endif () + if (NOT EXISTS "${hipSYCL_DIR}") + message(FATAL_ERROR "Can't find the appropriate CMake definitions for hipSYCL") + endif () + + # register_definitions(_GLIBCXX_USE_CXX11_ABI=0) + find_package(hipSYCL CONFIG REQUIRED) + message(STATUS "ok") + + elseif (${SYCL_COMPILER} STREQUAL "COMPUTECPP") + + list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) + set(ComputeCpp_DIR ${SYCL_COMPILER_DIR}) + + setup_opencl_header_includes() + + register_definitions(CL_TARGET_OPENCL_VERSION=220 _GLIBCXX_USE_CXX11_ABI=0) + # ComputeCpp needs OpenCL + find_package(ComputeCpp REQUIRED) + + # this must come after FindComputeCpp (!) + set(COMPUTECPP_USER_FLAGS -O3 -no-serial-memop) + + elseif (${SYCL_COMPILER} STREQUAL "DPCPP") + set(CMAKE_CXX_COMPILER ${SYCL_COMPILER_DIR}/bin/clang++) + include_directories(${SYCL_COMPILER_DIR}/include/sycl) + register_definitions(CL_TARGET_OPENCL_VERSION=220) + register_append_cxx_flags(ANY -fsycl) + register_append_link_flags(-fsycl) + elseif (${SYCL_COMPILER} STREQUAL "ONEAPI-DPCPP") + set(CMAKE_CXX_COMPILER dpcpp) + register_definitions(CL_TARGET_OPENCL_VERSION=220) + else () + message(FATAL_ERROR "SYCL_COMPILER=${SYCL_COMPILER} is unsupported") + endif () + +endmacro() + + +macro(setup_target NAME) + if ( + (${SYCL_COMPILER} STREQUAL "COMPUTECPP") OR + (${SYCL_COMPILER} STREQUAL "HIPSYCL")) + # so ComputeCpp and hipSYCL has this weird (and bad) CMake usage where they append their + # own custom integration header flags AFTER the target has been specified + # hence this macro here + add_sycl_to_target( + TARGET ${NAME} + SOURCES ${IMPL_SOURCES}) + endif () +endmacro()