From a66696d97146fd378213eaa1c1ad62a7dc74a241 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Thu, 11 Nov 2021 23:11:04 +0000 Subject: [PATCH 1/6] Initial Thrust implementation --- CMakeLists.txt | 1 + THRUST.cmake | 87 +++++++++++++++++ ThrustStream.cu | 235 +++++++++++++++++++++++++++++++++++++++++++++ ThrustStream.h | 43 +++++++++ ci-test-compile.sh | 7 ++ main.cpp | 6 ++ 6 files changed, 379 insertions(+) create mode 100644 THRUST.cmake create mode 100644 ThrustStream.cu create mode 100644 ThrustStream.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 797a9c0..42abfd8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,6 +113,7 @@ register_model(ACC ACC ACCStream.cpp) # defining RAJA collides with the RAJA namespace so USE_RAJA register_model(RAJA USE_RAJA RAJAStream.cpp) register_model(TBB TBB TBBStream.cpp) +register_model(THRUST THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model") diff --git a/THRUST.cmake b/THRUST.cmake new file mode 100644 index 0000000..1b94bf5 --- /dev/null +++ b/THRUST.cmake @@ -0,0 +1,87 @@ + +register_flag_optional(THRUST_IMPL + "Which Thrust implementation to use, supported options include: + - CUDA (via https://github.com/NVIDIA/thrust) + - ROCM (VIA https://github.com/ROCmSoftwarePlatform/rocThrust) + " + "CUDA") + +register_flag_optional(SDK_DIR + "Path to the selected Thrust implementation (e.g `/opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/include/thrust/` for NVHPC, `/opt/rocm` for ROCm)" + "") + +register_flag_optional(BACKEND + "[THRUST_IMPL==CUDA] CUDA's Thrust implementation supports the following backends: + - CUDA + - OMP + - TBB + " + "CUDA") + +register_flag_optional(CMAKE_CUDA_COMPILER + "[THRUST_IMPL==CUDA] Path to the CUDA nvcc compiler" + "") + +# XXX we may want to drop this eventually and use CMAKE_CUDA_ARCHITECTURES directly +register_flag_optional(CUDA_ARCH + "[THRUST_IMPL==CUDA] Nvidia architecture, will be passed in via `-arch=` (e.g `sm_70`) for nvcc" + "") + +register_flag_optional(CUDA_EXTRA_FLAGS + "[THRUST_IMPL==CUDA] Additional CUDA flags passed to nvcc, this is appended after `CUDA_ARCH`" + "") + + +macro(setup) + set(CMAKE_CXX_STANDARD 14) + + if (${THRUST_IMPL} STREQUAL "CUDA") + + # see CUDA.cmake, we're only adding a few Thrust related libraries here + + if (POLICY CMP0104) + cmake_policy(SET CMP0104 OLD) + endif () + + # add -forward-unknown-to-host-compiler for compatibility reasons + set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--expt-extended-lambda -forward-unknown-to-host-compiler -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS}) + enable_language(CUDA) + # CMake defaults to -O2 for CUDA at Release, let's wipe that and use the global RELEASE_FLAG + # appended later + wipe_gcc_style_optimisation_flags(CMAKE_CUDA_FLAGS_${BUILD_TYPE}) + + message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS} ${CMAKE_CUDA_FLAGS_${BUILD_TYPE}}") + + + if (SDK_DIR) + find_package(CUB REQUIRED CONFIG PATHS ${SDK_DIR}/cub) + find_package(Thrust REQUIRED CONFIG PATHS ${SDK_DIR}/thrust) + else () + find_package(CUB REQUIRED CONFIG) + find_package(Thrust REQUIRED CONFIG) + endif () + + message(STATUS "Using Thrust backend: ${BACKEND}") + + # this creates the interface that we can link to + thrust_create_target(Thrust HOST CPP DEVICE ${BACKEND}) + + register_link_library(Thrust) + elseif (${THRUST_IMPL} STREQUAL "ROCM") + if (SDK_DIR) + find_package(rocprim REQUIRED CONFIG PATHS ${SDK_DIR}/rocprim) + find_package(rocthrust REQUIRED CONFIG PATHS ${SDK_DIR}/rocthrust) + else () + find_package(rocprim REQUIRED CONFIG) + find_package(rocthrust REQUIRED CONFIG) + endif () + register_link_library(roc::rocthrust) + else () + message(FATAL_ERROR "Unsupported THRUST_IMPL provided: ${THRUST_IMPL}") + endif () + + +endmacro() + + + \ No newline at end of file diff --git a/ThrustStream.cu b/ThrustStream.cu new file mode 100644 index 0000000..3a57ab0 --- /dev/null +++ b/ThrustStream.cu @@ -0,0 +1,235 @@ +// Copyright (c) 2020 Tom Deakin +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#include "ThrustStream.h" +#include +#include +#include +#include + +static inline void synchronise() +{ +// rocThrust doesn't synchronise between thrust calls +#if defined(THRUST_DEVICE_SYSTEM_HIP) && THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_HIP + hipDeviceSynchronize(); +#endif +} + +template +ThrustStream::ThrustStream(const int ARRAY_SIZE, int device) + : array_size{ARRAY_SIZE}, a(array_size), b(array_size), c(array_size) { + std::cout << "Using CUDA device: " << getDeviceName(device) << std::endl; + std::cout << "Driver: " << getDeviceDriver(device) << std::endl; + std::cout << "Thrust version: " << THRUST_VERSION << std::endl; + +#if THRUST_DEVICE_SYSTEM == 0 + // as per Thrust docs, 0 is reserved for undefined backend + std::cout << "Thrust backend: undefined" << std::endl; +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA + std::cout << "Thrust backend: CUDA" << std::endl; +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_OMP + std::cout << "Thrust backend: OMP" << std::endl; +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_TBB + std::cout << "Thrust backend: TBB" << std::endl; +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CPP + std::cout << "Thrust backend: CPP" << std::endl; +#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_TBB + std::cout << "Thrust backend: TBB" << std::endl; +#else + +#if defined(THRUST_DEVICE_SYSTEM_HIP) && THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_HIP + std::cout << "Thrust backend: HIP" << std::endl; +#else + std::cout << "Thrust backend: " << THRUST_DEVICE_SYSTEM << "(unknown)" << std::endl; +#endif + +#endif + +} + +template +void ThrustStream::init_arrays(T initA, T initB, T initC) +{ + thrust::fill(a.begin(), a.end(), initA); + thrust::fill(b.begin(), b.end(), initB); + thrust::fill(c.begin(), c.end(), initC); + synchronise(); +} + +template +void ThrustStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) +{ + thrust::copy(a.begin(), a.end(), h_a.begin()); + thrust::copy(b.begin(), b.end(), h_b.begin()); + thrust::copy(c.begin(), c.end(), h_c.begin()); +} + +template +void ThrustStream::copy() +{ + thrust::copy(a.begin(), a.end(),c.begin()); + synchronise(); +} + +template +void ThrustStream::mul() +{ + const T scalar = startScalar; + thrust::transform( + c.begin(), + c.end(), + b.begin(), + [=] __device__ __host__ (const T &ci){ + return ci * scalar; + } + ); + synchronise(); +} + +template +void ThrustStream::add() +{ + thrust::transform( + thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin())), + thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end())), + c.begin(), + thrust::make_zip_function( + [] __device__ __host__ (const T& ai, const T& bi){ + return ai + bi; + }) + ); + synchronise(); +} + +template +void ThrustStream::triad() +{ + const T scalar = startScalar; + thrust::transform( + thrust::make_zip_iterator(thrust::make_tuple(b.begin(), c.begin())), + thrust::make_zip_iterator(thrust::make_tuple(b.end(), c.end())), + a.begin(), + thrust::make_zip_function( + [=] __device__ __host__ (const T& bi, const T& ci){ + return bi + scalar * ci; + }) + ); + synchronise(); +} + +template +void ThrustStream::nstream() +{ + const T scalar = startScalar; + thrust::transform( + thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin(), c.begin())), + thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end(), c.end())), + a.begin(), + thrust::make_zip_function( + [=] __device__ __host__ (const T& ai, const T& bi, const T& ci){ + return ai + bi + scalar * ci; + }) + ); + synchronise(); +} + +template +T ThrustStream::dot() +{ + return thrust::inner_product(a.begin(), a.end(), b.begin(), T{}); +} + +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA || \ + (defined(THRUST_DEVICE_SYSTEM_HIP) && THRUST_DEVICE_SYSTEM_HIP == THRUST_DEVICE_SYSTEM) + +#ifdef __NVCC__ +#define IMPL_FN__(fn) cuda ## fn +#define IMPL_TYPE__(tpe) cuda ## tpe +#elif defined(__HIP_PLATFORM_HCC__) +#define IMPL_FN__(fn) hip ## fn +#define IMPL_TYPE__(tpe) hip ## tpe ## _t +#else +# error Unsupported compiler for Thrust +#endif + +void check_error(void) +{ + IMPL_FN__(Error_t) err = IMPL_FN__(GetLastError()); + if (err != IMPL_FN__(Success)) + { + std::cerr << "Error: " << IMPL_FN__(GetErrorString(err)) << std::endl; + exit(err); + } +} + +void listDevices(void) +{ + // Get number of devices + int count; + IMPL_FN__(GetDeviceCount(&count)); + check_error(); + + // Print device names + if (count == 0) + { + std::cerr << "No devices found." << std::endl; + } + else + { + std::cout << std::endl; + std::cout << "Devices:" << std::endl; + for (int i = 0; i < count; i++) + { + std::cout << i << ": " << getDeviceName(i) << std::endl; + } + std::cout << std::endl; + } +} + +std::string getDeviceName(const int device) +{ + IMPL_TYPE__(DeviceProp) props = {}; + IMPL_FN__(GetDeviceProperties(&props, device)); + check_error(); + return std::string(props.name); +} + + +std::string getDeviceDriver(const int device) +{ + IMPL_FN__(SetDevice(device)); + check_error(); + int driver; + IMPL_FN__(DriverGetVersion(&driver)); + check_error(); + return std::to_string(driver); +} + +#undef IMPL_FN__ +#undef IMPL_TPE__ + +#else + +void listDevices(void) +{ + std::cout << "0: CPU" << std::endl; +} + +std::string getDeviceName(const int) +{ + return std::string("(device name unavailable)"); +} + +std::string getDeviceDriver(const int) +{ + return std::string("(device driver unavailable)"); +} + +#endif + +template class ThrustStream; +template class ThrustStream; + diff --git a/ThrustStream.h b/ThrustStream.h new file mode 100644 index 0000000..f87ace7 --- /dev/null +++ b/ThrustStream.h @@ -0,0 +1,43 @@ +// Copyright (c) 2020 Tom Deakin +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#pragma once + +#include +#include +#include + +#include "Stream.h" + +#define IMPLEMENTATION_STRING "Thrust" + +template +class ThrustStream : public Stream +{ + protected: + // Size of arrays + int array_size; + + thrust::device_vector a; + thrust::device_vector b; + thrust::device_vector c; + + public: + ThrustStream(const int, int); + ~ThrustStream() = default; + + virtual void copy() override; + virtual void add() override; + virtual void mul() override; + virtual void triad() override; + virtual void nstream() override; + virtual T dot() override; + + virtual void init_arrays(T initA, T initB, T initC) override; + virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; + +}; + diff --git a/ci-test-compile.sh b/ci-test-compile.sh index 00ca718..0a162f2 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -171,6 +171,11 @@ build_gcc() { # -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ # -DCUDA_ARCH=$NV_ARCH" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVSDK/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVSDK/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=OMP" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVSDK/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=TBB" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVSDK/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=CPP" + } build_clang() { @@ -220,6 +225,8 @@ build_aomp() { build_hip() { run_build hip_build "${HIP_CXX:?}" HIP "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}" + + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CXX_COMPILER=${HIP_CXX:?} -DSDK_DIR=$ROCM_PATH -DTHRUST_IMPL=ROCM" } build_icpx() { diff --git a/main.cpp b/main.cpp index de301ce..2791bdc 100644 --- a/main.cpp +++ b/main.cpp @@ -27,6 +27,8 @@ #include "STD20Stream.hpp" #elif defined(TBB) #include "TBBStream.hpp" +#elif defined(THRUST) +#include "ThrustStream.h" #elif defined(HIP) #include "HIPStream.h" #elif defined(HC) @@ -272,6 +274,10 @@ void run() // Use the C++20 implementation stream = new TBBStream(ARRAY_SIZE, deviceIndex); +#elif defined(THRUST) + // Use the Thrust implementation + stream = new ThrustStream(ARRAY_SIZE, deviceIndex); + #elif defined(ACC) // Use the OpenACC implementation stream = new ACCStream(ARRAY_SIZE, deviceIndex); From c2f75b90b3e2f02076d1e462f7a05aa72e433cd6 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Thu, 11 Nov 2021 23:30:04 +0000 Subject: [PATCH 2/6] Fix CI NVHPC path Fix CI ROCm install sources --- ci-prepare-bionic.sh | 7 +++++-- ci-test-compile.sh | 18 +++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/ci-prepare-bionic.sh b/ci-prepare-bionic.sh index fa3b2d2..3a3abac 100755 --- a/ci-prepare-bionic.sh +++ b/ci-prepare-bionic.sh @@ -143,9 +143,12 @@ setup_nvhpc() { local bin_dir="$sdk_dir/compilers/bin" "$bin_dir/makelocalrc" "$bin_dir" -x + export_var NVHPC_SDK_DIR "$sdk_dir" + export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.3" + export_var NVHPC_NVCXX "$bin_dir/nvc++" export_var NVHPC_NVCC "$sdk_dir/cuda/11.3/bin/nvcc" - export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.3" + echo "Installed CUDA versions:" ls "$sdk_dir/cuda" verify_bin_exists "$NVHPC_NVCXX" @@ -249,7 +252,7 @@ setup_clang_gcc() { setup_rocm() { wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add - - echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/debian/ xenial main' | sudo tee /etc/apt/sources.list.d/rocm.list + echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.5 ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list sudo apt-get update -qq sudo apt-get install -y -qq rocm-dev export_var ROCM_PATH "/opt/rocm" diff --git a/ci-test-compile.sh b/ci-test-compile.sh index 0a162f2..e56931c 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -92,11 +92,11 @@ run_build() { # GCC_CXX="/usr/bin/g++" # CLANG_CXX="/usr/bin/clang++" -# NVSDK="/home/tom/Downloads/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2/" -# NVHPC_NVCXX="$NVSDK/compilers/bin/nvc++" -# NVHPC_NVCC="$NVSDK/cuda/11.2/bin/nvcc" -# NVHPC_CUDA_DIR="$NVSDK/cuda/11.2" -# "$NVSDK/compilers/bin/makelocalrc" "$NVSDK/compilers/bin/" -x +# NVHPC_SDK_DIR="/home/tom/Downloads/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2/" +# NVHPC_NVCXX="$NVHPC_SDK_DIR/compilers/bin/nvc++" +# NVHPC_NVCC="$NVHPC_SDK_DIR/cuda/11.2/bin/nvcc" +# NVHPC_CUDA_DIR="$NVHPC_SDK_DIR/cuda/11.2" +# "$NVHPC_SDK_DIR/compilers/bin/makelocalrc" "$NVHPC_SDK_DIR/compilers/bin/" -x # AOCC_CXX="/opt/AMD/aocc-compiler-2.3.0/bin/clang++" # AOMP_CXX="/usr/lib/aomp/bin/clang++" @@ -171,10 +171,10 @@ build_gcc() { # -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ # -DCUDA_ARCH=$NV_ARCH" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVSDK/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVSDK/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=OMP" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVSDK/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=TBB" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVSDK/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=CPP" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=OMP" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=TBB" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=CPP" } From a463e88895e7725c002dad15cab1781cd76c0bd3 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Thu, 11 Nov 2021 23:50:27 +0000 Subject: [PATCH 3/6] Fix CI rocThrust build variables Fix CI CUDA cmake module include path Bump CI NVHPC version --- THRUST.cmake | 4 ++-- ci-prepare-bionic.sh | 10 +++++----- ci-test-compile.sh | 14 ++++++++------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/THRUST.cmake b/THRUST.cmake index 1b94bf5..8aaef26 100644 --- a/THRUST.cmake +++ b/THRUST.cmake @@ -2,12 +2,12 @@ register_flag_optional(THRUST_IMPL "Which Thrust implementation to use, supported options include: - CUDA (via https://github.com/NVIDIA/thrust) - - ROCM (VIA https://github.com/ROCmSoftwarePlatform/rocThrust) + - ROCM (via https://github.com/ROCmSoftwarePlatform/rocThrust) " "CUDA") register_flag_optional(SDK_DIR - "Path to the selected Thrust implementation (e.g `/opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/include/thrust/` for NVHPC, `/opt/rocm` for ROCm)" + "Path to the selected Thrust implementation (e.g `/opt/nvidia/hpc_sdk/Linux_x86_64/21.9/cuda/include` for NVHPC, `/opt/rocm` for ROCm)" "") register_flag_optional(BACKEND diff --git a/ci-prepare-bionic.sh b/ci-prepare-bionic.sh index 3a3abac..462d321 100755 --- a/ci-prepare-bionic.sh +++ b/ci-prepare-bionic.sh @@ -135,19 +135,19 @@ setup_aocc() { setup_nvhpc() { echo "Preparing Nvidia HPC SDK" local tarball="nvhpc.tar.gz" -# local url="http://localhost:8000/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz" - local url="https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz" +# local url="http://localhost:8000/nvhpc_2021_219_Linux_x86_64_cuda_11.4.tar.gz" + local url="https://developer.download.nvidia.com/hpc-sdk/21.9/nvhpc_2021_219_Linux_x86_64_cuda_11.4.tar.gz" get_and_untar "$tarball" "$url" - local sdk_dir="$PWD/nvhpc_2021_215_Linux_x86_64_cuda_11.3/install_components/Linux_x86_64/21.5" + local sdk_dir="$PWD/nvhpc_2021_219_Linux_x86_64_cuda_11.4/install_components/Linux_x86_64/21.9" local bin_dir="$sdk_dir/compilers/bin" "$bin_dir/makelocalrc" "$bin_dir" -x export_var NVHPC_SDK_DIR "$sdk_dir" - export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.3" + export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.4" export_var NVHPC_NVCXX "$bin_dir/nvc++" - export_var NVHPC_NVCC "$sdk_dir/cuda/11.3/bin/nvcc" + export_var NVHPC_NVCC "$sdk_dir/cuda/11.4/bin/nvcc" echo "Installed CUDA versions:" ls "$sdk_dir/cuda" diff --git a/ci-test-compile.sh b/ci-test-compile.sh index e56931c..6544731 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -171,10 +171,10 @@ build_gcc() { # -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ # -DCUDA_ARCH=$NV_ARCH" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=OMP" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=TBB" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include/thrust -DTHRUST_IMPL=CUDA -DBACKEND=CPP" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include -DTHRUST_IMPL=CUDA -DBACKEND=OMP" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include -DTHRUST_IMPL=CUDA -DBACKEND=TBB" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include -DTHRUST_IMPL=CUDA -DBACKEND=CPP" } @@ -224,9 +224,11 @@ build_aomp() { } build_hip() { - run_build hip_build "${HIP_CXX:?}" HIP "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}" + local name="hip_build" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CXX_COMPILER=${HIP_CXX:?} -DSDK_DIR=$ROCM_PATH -DTHRUST_IMPL=ROCM" + run_build $name "${HIP_CXX:?}" HIP "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}" + + run_build $name "${GCC_CXX:?}" THRUST "-DCMAKE_CXX_COMPILER=${HIP_CXX:?} -DSDK_DIR=$ROCM_PATH -DTHRUST_IMPL=ROCM" } build_icpx() { From 0d55a7261b54bf87311a0aac50f3702366ed3e33 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Fri, 12 Nov 2021 00:14:07 +0000 Subject: [PATCH 4/6] Fix CI not installing rocThrust Fix CI CUDA flag version --- ci-prepare-bionic.sh | 2 +- ci-test-compile.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci-prepare-bionic.sh b/ci-prepare-bionic.sh index 462d321..6aee569 100755 --- a/ci-prepare-bionic.sh +++ b/ci-prepare-bionic.sh @@ -254,7 +254,7 @@ setup_rocm() { wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add - echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.5 ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list sudo apt-get update -qq - sudo apt-get install -y -qq rocm-dev + sudo apt-get install -y -qq rocm-dev rocthrust-dev export_var ROCM_PATH "/opt/rocm" export_var HIP_CXX "$ROCM_PATH/bin/hipcc" verify_bin_exists "$HIP_CXX" diff --git a/ci-test-compile.sh b/ci-test-compile.sh index 6544731..f72a76b 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -124,7 +124,7 @@ run_build() { AMD_ARCH="gfx_903" NV_ARCH="sm_70" -NV_ARCH_CCXY="cuda11.3,cc80" +NV_ARCH_CCXY="cuda11.4,cc80" build_gcc() { local name="gcc_build" From fe4007b4460953b4bafdfb825e1a24e9807c756b Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Fri, 12 Nov 2021 02:26:31 +0000 Subject: [PATCH 5/6] Fix CI ROCm quirks Fix CI CUDA path --- ci-prepare-bionic.sh | 1 + ci-test-compile.sh | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ci-prepare-bionic.sh b/ci-prepare-bionic.sh index 6aee569..7294905 100755 --- a/ci-prepare-bionic.sh +++ b/ci-prepare-bionic.sh @@ -256,6 +256,7 @@ setup_rocm() { sudo apt-get update -qq sudo apt-get install -y -qq rocm-dev rocthrust-dev export_var ROCM_PATH "/opt/rocm" + export_var PATH "$ROCM_PATH/bin:$PATH" # ROCm needs this for many of their libraries' CMake build to work export_var HIP_CXX "$ROCM_PATH/bin/hipcc" verify_bin_exists "$HIP_CXX" "$HIP_CXX" --version diff --git a/ci-test-compile.sh b/ci-test-compile.sh index f72a76b..e0bec2f 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -171,10 +171,10 @@ build_gcc() { # -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ # -DCUDA_ARCH=$NV_ARCH" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include -DTHRUST_IMPL=CUDA -DBACKEND=OMP" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include -DTHRUST_IMPL=CUDA -DBACKEND=TBB" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_SDK_DIR/cuda/include -DTHRUST_IMPL=CUDA -DBACKEND=CPP" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=OMP" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=TBB" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CPP" } From dc42388df311c4aff08e97834a607143f95757b2 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Fri, 12 Nov 2021 03:25:18 +0000 Subject: [PATCH 6/6] Fix CXX recognition issues for rocThrust Fix CI check for min CMake version on CUDA Thrust Temporarily disable CUDA Thrust w/ TBB for now --- THRUST.cmake | 4 ++++ ci-test-compile.sh | 29 +++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/THRUST.cmake b/THRUST.cmake index 8aaef26..0c286c2 100644 --- a/THRUST.cmake +++ b/THRUST.cmake @@ -75,6 +75,10 @@ macro(setup) find_package(rocprim REQUIRED CONFIG) find_package(rocthrust REQUIRED CONFIG) endif () + + # for HIP we treat *.cu files as CXX otherwise CMake doesn't compile them + set_source_files_properties(${IMPL_SOURCES} PROPERTIES LANGUAGE CXX) + register_link_library(roc::rocthrust) else () message(FATAL_ERROR "Unsupported THRUST_IMPL provided: ${THRUST_IMPL}") diff --git a/ci-test-compile.sh b/ci-test-compile.sh index e0bec2f..a61834c 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -57,7 +57,7 @@ run_build() { local cmake_code=$? "$CMAKE_BIN" --build "$build" -j "$(nproc)" &>>"$log" - "$CMAKE_BIN" --build "$build" --target install -j "$(nproc)" &>>"$log" + "$CMAKE_BIN" --build "$build" --target install -j "$(nproc)" &>>"$log" local cmake_code=$? set -e @@ -171,10 +171,27 @@ build_gcc() { # -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ # -DCUDA_ARCH=$NV_ARCH" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=OMP" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=TBB" - run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CPP" + + # CMake >= 3.15 only due to Nvidia's Thrust CMake requirements + local current=$("$CMAKE_BIN" --version | head -n 1 | cut -d ' ' -f3) + local required="3.15.0" + if [ "$(printf '%s\n' "$required" "$current" | sort -V | head -n1)" = "$required" ]; then + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CUDA" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=OMP" + run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CPP" + + # FIXME CUDA Thrust + TBB throws the following error: + # /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(9146): error: identifier "__builtin_ia32_rndscaless_round" is undefined + # /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(9155): error: identifier "__builtin_ia32_rndscalesd_round" is undefined + # /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(14797): error: identifier "__builtin_ia32_rndscaless_round" is undefined + # /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(14806): error: identifier "__builtin_ia32_rndscalesd_round" is undefined + # /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512dqintrin.h(1365): error: identifier "__builtin_ia32_fpclassss" is undefined + # /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512dqintrin.h(1372): error: identifier "__builtin_ia32_fpclasssd" is undefined + + # run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=TBB" + else + echo "CMake version ${current} < ${required}, skipping Thrust models" + fi } @@ -198,7 +215,7 @@ build_clang() { run_build $name "${CLANG_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" run_build $name "${CLANG_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # run_build $name "${LANG_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported - + run_build $name "${CLANG_CXX:?}" TBB "$cxx -DONE_TBB_DIR=$TBB_LIB" run_build $name "${CLANG_CXX:?}" TBB "$cxx" # build TBB again with the system TBB