Fix up CI, resolves #145, supersedes #154

Drop vector for std-* models
This commit is contained in:
Tom Lin 2023-09-24 21:11:35 +01:00
parent 3dcafd1af1
commit 72be9f6980
15 changed files with 278 additions and 250 deletions

View File

@ -12,7 +12,7 @@ on:
jobs: jobs:
test-rust: test-rust:
runs-on: ubuntu-18.04 runs-on: ubuntu-22.04
defaults: defaults:
run: run:
working-directory: ./src/rust/rust-stream working-directory: ./src/rust/rust-stream
@ -28,7 +28,7 @@ jobs:
run: ./target/release/rust-stream --arraysize 2048 run: ./target/release/rust-stream --arraysize 2048
test-java: test-java:
runs-on: ubuntu-18.04 runs-on: ubuntu-22.04
defaults: defaults:
run: run:
working-directory: ./src/java/java-stream working-directory: ./src/java/java-stream
@ -41,7 +41,7 @@ jobs:
run: java -jar target/java-stream.jar --arraysize 2048 run: java -jar target/java-stream.jar --arraysize 2048
test-julia: test-julia:
runs-on: ubuntu-18.04 runs-on: ubuntu-22.04
defaults: defaults:
run: run:
working-directory: ./src/julia/JuliaStream.jl working-directory: ./src/julia/JuliaStream.jl
@ -69,8 +69,24 @@ jobs:
run: julia --project src/AMDGPUStream.jl --list run: julia --project src/AMDGPUStream.jl --list
setup-cpp:
runs-on: ubuntu-22.04
steps:
- name: Cache compiler
# if: ${{ !env.ACT }}
id: prepare-compilers
uses: actions/cache@v2
with:
path: ./compilers
key: ${{ runner.os }}-${{ hashFiles('./src/ci-prepare-bionic.sh') }}
- name: Prepare compilers
if: steps.prepare-compilers.outputs.cache-hit != 'true'
run: source ./src/ci-prepare-bionic.sh ./compilers SETUP false || true
test-cpp: test-cpp:
runs-on: ubuntu-18.04 needs: setup-cpp
runs-on: ubuntu-22.04
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -84,15 +100,15 @@ jobs:
- name: Prepare compilers - name: Prepare compilers
if: steps.prepare-compilers.outputs.cache-hit != 'true' if: steps.prepare-compilers.outputs.cache-hit != 'true'
run: source ./src/ci-prepare-bionic.sh ./compilers SETUP true || true run: source ./src/ci-prepare-bionic.sh ./compilers SETUP false || true
- name: Setup test environment - name: Setup test environment
run: source ./src/ci-prepare-bionic.sh ./compilers VARS false || true run: source ./src/ci-prepare-bionic.sh ./compilers VARS false || true
# Enable tmate debugging of manually-triggered workflows if the input option was provided # Enable tmate debugging of manually-triggered workflows if the input option was provided
- name: Setup tmate session # - name: Setup tmate session
uses: mxschmitt/action-tmate@v3 # uses: mxschmitt/action-tmate@v3
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }} # if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }}
- name: Test compile gcc @ CMake 3.13 - name: Test compile gcc @ CMake 3.13
if: ${{ ! cancelled() }} if: ${{ ! cancelled() }}
@ -167,4 +183,54 @@ jobs:
run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }} run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }}
- name: Test compile hipsycl @ CMake 3.18 - name: Test compile hipsycl @ CMake 3.18
if: ${{ ! cancelled() }} if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }} run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }}
- name: Test compile gcc @ CMake 3.20
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_20_BIN }}
- name: Test compile clang @ CMake 3.20
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_20_BIN }}
- name: Test compile nvhpc @ CMake 3.20
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_20_BIN }}
- name: Test compile aocc @ CMake 3.20
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_20_BIN }}
- name: Test compile aomp @ CMake 3.20
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_20_BIN }}
- name: Test compile hip @ CMake 3.20
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_20_BIN }}
- name: Test compile dpcpp @ CMake 3.20
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_20_BIN }}
- name: Test compile hipsycl @ CMake 3.20
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_20_BIN }}
- name: Test compile gcc @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile clang @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile nvhpc @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile aocc @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile aomp @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile hip @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile dpcpp @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile hipsycl @ CMake 3.24
if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }}

View File

@ -1,5 +1,9 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR) cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW)
endif ()
project(BabelStream VERSION 4.0 LANGUAGES CXX) project(BabelStream VERSION 4.0 LANGUAGES CXX)
# uncomment for debugging build issues: # uncomment for debugging build issues:
@ -71,15 +75,19 @@ hint_flag(CXX_EXTRA_LINKER_FLAGS "
# Honor user's CXX_EXTRA_LINK_FLAGS # Honor user's CXX_EXTRA_LINK_FLAGS
set(CXX_EXTRA_LINK_FLAGS ${CXX_EXTRA_FLAGS} ${CXX_EXTRA_LINK_FLAGS}) set(CXX_EXTRA_LINK_FLAGS ${CXX_EXTRA_FLAGS} ${CXX_EXTRA_LINK_FLAGS})
option(USE_TBB "Enable oneTBB library for *supported* models. Enabling this on models that option(USE_TBB "Enable the oneTBB library for *supported* models. Enabling this on models that
don't explicitly link against TBB is a no-op, see description of your selected don't explicitly link against TBB is a no-op, see description of your selected
model on how this is used." OFF) model on how this is used." OFF)
if (USE_TBB) option(FETCH_TBB "Fetch (download) the oneTBB library for *supported* models. This uses CMake's
FetchContent feature. Specify version by setting FETCH_TBB_VERSION" OFF)
set(FETCH_TBB_VERSION "v2021.10.0" CACHE STRING "Specify version of oneTBB to use if FETCH_TBB is ON")
if (FETCH_TBB)
FetchContent_Declare( FetchContent_Declare(
TBB TBB
GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git
GIT_TAG v2021.9.0 GIT_TAG "${FETCH_TBB_VERSION}"
) )
# Don't fail builds on waring (TBB has -Wall while not being free of warnings from unused symbols...) # Don't fail builds on waring (TBB has -Wall while not being free of warnings from unused symbols...)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
@ -92,15 +100,19 @@ if (USE_TBB)
endif () endif ()
endif () endif ()
option(USE_ONEDPL "Enable oneDPL library for *supported* models. Enabling this on models that option(USE_ONEDPL "Enable the oneDPL library for *supported* models. Enabling this on models that
don't explicitly link against DPL is a no-op, see description of your selected don't explicitly link against DPL is a no-op, see description of your selected
model on how this is used." OFF) model on how this is used." OFF)
if (USE_ONEDPL) option(FETCH_ONEDPL "Fetch (download) the oneDPL library for *supported* models. This uses CMake's
FetchContent feature. Specify version by setting FETCH_ONEDPL_VERSION" OFF)
set(FETCH_ONEDPL_VERSION "oneDPL-2022.2.0-rc1" CACHE STRING "Specify version of oneTBB to use if FETCH_ONEDPL is ON")
if (FETCH_ONEDPL)
FetchContent_Declare( FetchContent_Declare(
oneDPL oneDPL
GIT_REPOSITORY https://github.com/oneapi-src/oneDPL.git GIT_REPOSITORY https://github.com/oneapi-src/oneDPL.git
GIT_TAG oneDPL-2022.2.0-rc1 GIT_TAG "${FETCH_ONEDPL_VERSION}"
) )
string(TOLOWER ${USE_ONEDPL} ONEDPL_BACKEND) string(TOLOWER ${USE_ONEDPL} ONEDPL_BACKEND)
# XXX oneDPL looks for omp instead of openmp, which mismatches(!) with ONEDPL_PAR_BACKEND if using find_package # XXX oneDPL looks for omp instead of openmp, which mismatches(!) with ONEDPL_PAR_BACKEND if using find_package

View File

@ -83,6 +83,8 @@ get() {
if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then
echo "$name not found, downloading..." echo "$name not found, downloading..."
wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name" wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name"
else
echo "$name found, skipping download..."
fi fi
fi fi
} }
@ -92,13 +94,15 @@ get_and_untar() {
local pkg_url="$2" local pkg_url="$2"
if [ "$SETUP" = true ]; then if [ "$SETUP" = true ]; then
if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then
echo "$name not found, downloading..." echo "$name not found, downloading ($pkg_url)..."
wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name" wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name"
fi fi
echo "Preparing to extract $name ..." echo "Preparing to extract $name ..."
tar -xf "$name" tar -xf "$name"
echo "$name extracted, deleting archive ..." echo "$name extracted, deleting archive ..."
rm -f "$name" # delete for space rm -f "$name" # delete for space
else
echo "Skipping setup for $name ($pkg_url)..."
fi fi
} }
@ -119,10 +123,10 @@ verify_dir_exists() {
setup_aocc() { setup_aocc() {
echo "Preparing AOCC" echo "Preparing AOCC"
local aocc_ver="2.3.0" local aocc_ver="4.0.0"
local tarball="aocc-$aocc_ver.tar.xz" local tarball="aocc-$aocc_ver.tar.xz"
# XXX it's actually XZ compressed, so it should be tar.xz # XXX it's actually XZ compressed, so it should be tar.xz
local AOCC_URL="http://developer.amd.com/wordpress/media/files/aocc-compiler-2.3.0.tar" local AOCC_URL="https://download.amd.com/developer/eula/aocc-compiler/aocc-compiler-${aocc_ver}.tar"
# local AOCC_URL="http://localhost:8000/aocc-compiler-2.3.0.tar" # local AOCC_URL="http://localhost:8000/aocc-compiler-2.3.0.tar"
get_and_untar "$tarball" "$AOCC_URL" get_and_untar "$tarball" "$AOCC_URL"
@ -133,10 +137,10 @@ setup_aocc() {
} }
setup_nvhpc() { setup_nvhpc() {
echo "Preparing Nvidia HPC SDK" echo "Preparing Nvidia HPC SDK"
local nvhpc_ver="22.3" local nvhpc_ver="23.1" # TODO FIXME > 23.1 has a bug with -A
local nvhpc_release="2022_223" local nvhpc_release="2023_231"
local cuda_ver="11.6" local cuda_ver="12.0"
local tarball="nvhpc_$nvhpc_ver.tar.gz" local tarball="nvhpc_$nvhpc_ver.tar.gz"
@ -145,7 +149,7 @@ setup_nvhpc() {
local sdk_dir="$PWD/nvhpc_${nvhpc_release}_Linux_x86_64_cuda_$cuda_ver/install_components/Linux_x86_64/$nvhpc_ver" local sdk_dir="$PWD/nvhpc_${nvhpc_release}_Linux_x86_64_cuda_$cuda_ver/install_components/Linux_x86_64/$nvhpc_ver"
local bin_dir="$sdk_dir/compilers/bin" local bin_dir="$sdk_dir/compilers/bin"
"$bin_dir/makelocalrc" "$bin_dir" -x "$bin_dir/makelocalrc" -d "$bin_dir" -x -gpp g++-12 -gcc gcc-12 -g77 gfortran-12
export_var NVHPC_SDK_DIR "$sdk_dir" export_var NVHPC_SDK_DIR "$sdk_dir"
export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/$cuda_ver" export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/$cuda_ver"
@ -166,7 +170,8 @@ setup_nvhpc() {
setup_aomp() { setup_aomp() {
echo "Preparing AOMP" echo "Preparing AOMP"
local AOMP_URL="https://github.com/ROCm-Developer-Tools/aomp/releases/download/rel_11.12-0/aomp_Ubuntu1804_11.12-0_amd64.deb" local aomp_ver="18.0-0"
local AOMP_URL="https://github.com/ROCm-Developer-Tools/aomp/releases/download/rel_${aomp_ver}/aomp_Ubuntu2204_${aomp_ver}_amd64.deb"
# local AOMP_URL="http://0.0.0.0:8000/aomp_Ubuntu1804_11.12-0_amd64.deb" # local AOMP_URL="http://0.0.0.0:8000/aomp_Ubuntu1804_11.12-0_amd64.deb"
get_and_install_deb "aomp" "aomp" "$AOMP_URL" get_and_install_deb "aomp" "aomp" "$AOMP_URL"
@ -189,9 +194,10 @@ setup_oclcpu() {
setup_kokkos() { setup_kokkos() {
echo "Preparing Kokkos" echo "Preparing Kokkos"
local kokkos_ver="3.3.01" local kokkos_ver="4.1.00"
local tarball="kokkos-$kokkos_ver.tar.gz" local tarball="kokkos-$kokkos_ver.tar.gz"
local url="https://github.com/kokkos/kokkos/archive/$kokkos_ver.tar.gz" local url="https://github.com/kokkos/kokkos/archive/$kokkos_ver.tar.gz"
# local url="http://localhost:8000/$kokkos_ver.tar.gz" # local url="http://localhost:8000/$kokkos_ver.tar.gz"
@ -203,10 +209,10 @@ setup_kokkos() {
setup_raja() { setup_raja() {
echo "Preparing RAJA" echo "Preparing RAJA"
local raja_ver="0.13.0" local raja_ver="2023.06.1"
local tarball="raja-$raja_ver.tar.gz" local tarball="raja-$raja_ver.tar.gz"
local url="https://github.com/LLNL/RAJA/releases/download/v0.13.0/RAJA-v$raja_ver.tar.gz" local url="https://github.com/LLNL/RAJA/releases/download/v$raja_ver/RAJA-v$raja_ver.tar.gz"
# local url="http://localhost:8000/RAJA-v$raja_ver.tar.gz" # local url="http://localhost:8000/RAJA-v$raja_ver.tar.gz"
get_and_untar "$tarball" "$url" get_and_untar "$tarball" "$url"
@ -217,7 +223,7 @@ setup_raja() {
setup_tbb() { setup_tbb() {
echo "Preparing TBB" echo "Preparing TBB"
local tbb_ver="2021.2.0" local tbb_ver="2021.9.0"
local tarball="oneapi-tbb-$tbb_ver-lin.tgz" local tarball="oneapi-tbb-$tbb_ver-lin.tgz"
local url="https://github.com/oneapi-src/oneTBB/releases/download/v$tbb_ver/oneapi-tbb-$tbb_ver-lin.tgz" local url="https://github.com/oneapi-src/oneTBB/releases/download/v$tbb_ver/oneapi-tbb-$tbb_ver-lin.tgz"
@ -231,9 +237,9 @@ setup_tbb() {
setup_clang_gcc() { setup_clang_gcc() {
sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10 clang libomp-dev sudo apt-get install -y -qq gcc-12-offload-nvptx gcc-12-offload-amdgcn libtbb2 libtbb-dev g++-12 clang libomp-dev libc6
export_var GCC_CXX "$(which g++-10)" export_var GCC_CXX "$(which g++-12)"
verify_bin_exists "$GCC_CXX" verify_bin_exists "$GCC_CXX"
"$GCC_CXX" --version "$GCC_CXX" --version
@ -254,7 +260,7 @@ setup_clang_gcc() {
} }
setup_rocm() { setup_rocm() {
sudo apt-get install -y -qq rocm-dev rocthrust-dev sudo apt-get install -y rocm-dev rocthrust-dev
export_var ROCM_PATH "/opt/rocm" export_var ROCM_PATH "/opt/rocm"
export_var PATH "$ROCM_PATH/bin:$PATH" # ROCm needs this for many of their libraries' CMake build to work export_var PATH "$ROCM_PATH/bin:$PATH" # ROCm needs this for many of their libraries' CMake build to work
export_var HIP_CXX "$ROCM_PATH/bin/hipcc" export_var HIP_CXX "$ROCM_PATH/bin/hipcc"
@ -265,7 +271,7 @@ setup_rocm() {
setup_dpcpp() { setup_dpcpp() {
local nightly="20210106" local nightly="20230615"
local tarball="dpcpp-$nightly.tar.gz" local tarball="dpcpp-$nightly.tar.gz"
local url="https://github.com/intel/llvm/releases/download/sycl-nightly/$nightly/dpcpp-compiler.tar.gz" local url="https://github.com/intel/llvm/releases/download/sycl-nightly/$nightly/dpcpp-compiler.tar.gz"
@ -282,22 +288,22 @@ setup_dpcpp() {
setup_hipsycl() { setup_hipsycl() {
sudo apt-get install -y -qq libboost-fiber-dev libboost-context-dev sudo apt-get install -y -qq libboost-fiber-dev libboost-context-dev
local hipsycl_ver="0.9.0" local hipsycl_ver="0.9.1"
local tarball="v$hipsycl_ver.tar.gz" local tarball="v$hipsycl_ver.tar.gz"
local install_dir="$PWD/hipsycl_dist_$hipsycl_ver" local install_dir="$PWD/hipsycl_dist_$hipsycl_ver"
local url="https://github.com/illuhad/hipSYCL/archive/v$hipsycl_ver.tar.gz" local url="https://github.com/AdaptiveCpp/AdaptiveCpp/archive/v$hipsycl_ver.tar.gz"
# local url="http://localhost:8000/hipSYCL-$hipsycl_ver.tar.gz" # local url="http://localhost:8000/AdaptiveCpp-$hipsycl_ver.tar.gz"
get_and_untar "$tarball" "$url" get_and_untar "$tarball" "$url"
if [ "$SETUP" = true ]; then if [ "$SETUP" = true ]; then
local src="$PWD/hipSYCL-$hipsycl_ver" local src="$PWD/AdaptiveCpp-$hipsycl_ver"
rm -rf "$src/build" rm -rf "$src/build"
rm -rf "$install_dir" rm -rf "$install_dir"
cmake "-B$src/build" "-H$src" \ cmake "-B$src/build" "-H$src" \
-DCMAKE_C_COMPILER="$(which gcc-10)" \ -DCMAKE_C_COMPILER="$(which gcc-12)" \
-DCMAKE_CXX_COMPILER="$(which g++-10)" \ -DCMAKE_CXX_COMPILER="$(which g++-12)" \
-DCMAKE_INSTALL_PREFIX="$install_dir" \ -DCMAKE_INSTALL_PREFIX="$install_dir" \
-DWITH_ROCM_BACKEND=OFF \ -DWITH_ROCM_BACKEND=OFF \
-DWITH_CUDA_BACKEND=OFF \ -DWITH_CUDA_BACKEND=OFF \
@ -312,25 +318,20 @@ setup_hipsycl() {
check_size check_size
} }
setup_computecpp() {
echo "TODO ComputeCpp requires registration+login to download"
}
if [ "${GITHUB_ACTIONS:-false}" = true ]; then if [ "${GITHUB_ACTIONS:-false}" = true ]; then
echo "Running in GitHub Actions, defaulting to special export" echo "Running in GitHub Actions, defaulting to special export"
TERM=xterm TERM=xterm
export TERM=xterm export TERM=xterm
# drop the lock in case we got one from a failed run # drop the lock in case we got one from a failed run
rm /var/lib/dpkg/lock-frontend || true rm -rf /var/lib/dpkg/lock-frontend || true
rm /var/cache/apt/archives/lock || true rm -rf /var/cache/apt/archives/lock || true
wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add -
echo "deb http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.5 ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list
mkdir --parents --mode=0755 /etc/apt/keyrings
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/5.7 jammy main' | sudo tee /etc/apt/sources.list.d/rocm.list
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt-get update -qq sudo apt-get update -qq
sudo apt-get install -y -qq cmake sudo apt-get install cmake gcc g++ libelf-dev libdrm-amdgpu1 libnuma-dev
if [ "$SETUP" = true ]; then if [ "$SETUP" = true ]; then
echo "Deleting extra packages for space in 2 seconds..." echo "Deleting extra packages for space in 2 seconds..."
@ -340,6 +341,7 @@ if [ "${GITHUB_ACTIONS:-false}" = true ]; then
sudo apt-get autoremove -y sudo apt-get autoremove -y
check_size check_size
fi fi
sudo apt-get upgrade -qq
else else
echo "Running locally, defaulting to standard export" echo "Running locally, defaulting to standard export"
fi fi
@ -368,6 +370,18 @@ setup_cmake() {
verify_bin_exists "$CMAKE_3_18_BIN" verify_bin_exists "$CMAKE_3_18_BIN"
"$CMAKE_3_18_BIN" --version "$CMAKE_3_18_BIN" --version
get "cmake-3.20.sh" "$cmake_release/v3.20.4/cmake-3.20.4-linux-x86_64.sh"
chmod +x "./cmake-3.20.sh" && "./cmake-3.20.sh" --skip-license --include-subdir
export_var CMAKE_3_20_BIN "$PWD/cmake-3.20.4-linux-x86_64/bin/cmake"
verify_bin_exists "$CMAKE_3_20_BIN"
"$CMAKE_3_20_BIN" --version
get "cmake-3.24.sh" "$cmake_release/v3.24.4/cmake-3.24.4-linux-x86_64.sh"
chmod +x "./cmake-3.24.sh" && "./cmake-3.24.sh" --skip-license --include-subdir
export_var CMAKE_3_24_BIN "$PWD/cmake-3.24.4-linux-x86_64/bin/cmake"
verify_bin_exists "$CMAKE_3_24_BIN"
"$CMAKE_3_24_BIN" --version
check_size check_size
} }
@ -385,6 +399,10 @@ if [ "$PARALLEL" = true ]; then
setup_tbb & setup_tbb &
wait wait
else else
# these need apt
setup_clang_gcc
setup_rocm
setup_hipsycl
setup_cmake setup_cmake
setup_aocc setup_aocc
setup_oclcpu setup_oclcpu
@ -394,10 +412,6 @@ else
setup_kokkos setup_kokkos
setup_raja setup_raja
setup_tbb setup_tbb
# these need apt
setup_clang_gcc
setup_rocm
setup_hipsycl
fi fi
echo "Done!" echo "Done!"

View File

@ -120,10 +120,21 @@ run_build() {
# CLANG_OMP_OFFLOAD_NVIDIA=false # CLANG_OMP_OFFLOAD_NVIDIA=false
### ###
NV_ARCH_CC="70"
AMD_ARCH="gfx_903" AMD_ARCH="gfx_903"
NV_ARCH="sm_70" NV_ARCH="sm_${NV_ARCH_CC}"
NV_ARCH_CCXY="cuda${NVHPC_CUDA_VER:?},cc80" NV_ARCH_CCXY="cuda${NVHPC_CUDA_VER:?},cc80"
check_cmake_ver(){
local current=$("$CMAKE_BIN" --version | head -n 1 | cut -d ' ' -f3)
local required=$1
if [ "$(printf '%s\n' "$required" "$current" | sort -V | head -n1)" = "$required" ]; then
return 0
else
return 1
fi
}
build_gcc() { build_gcc() {
local name="gcc_build" local name="gcc_build"
local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}" local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}"
@ -138,14 +149,12 @@ build_gcc() {
for use_onedpl in OFF OPENMP TBB; do for use_onedpl in OFF OPENMP TBB; do
case "$use_onedpl" in case "$use_onedpl" in
OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" ;; OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" ;;
*) dpl_conditional_flags="-DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;; *) dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
esac esac
for use_vector in OFF ON; do # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
# some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here run_build $name "${GCC_CXX:?}" std-data "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${GCC_CXX:?}" std-data "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector" run_build $name "${GCC_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${GCC_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector" run_build $name "${GCC_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${GCC_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector"
done
done done
run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB" run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
@ -153,40 +162,45 @@ build_gcc() {
run_build $name "${GCC_CXX:?}" tbb "$cxx -DUSE_VECTOR=ON" # build with vectors run_build $name "${GCC_CXX:?}" tbb "$cxx -DUSE_VECTOR=ON" # build with vectors
if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
run_build "amd_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa" run_build "amd_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa;-fno-stack-protector;-fcf-protection=none"
run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH" run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
fi fi
if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
run_build "nvidia_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none" run_build "nvidia_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none;-fno-stack-protector;-fcf-protection=none"
run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
fi fi
run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
# run_build $name "${CC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON" if check_cmake_ver "3.16.0"; then
run_build "cuda_$name" "${GCC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" # run_build $name "${CC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON"
run_build "cuda_$name" "${GCC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
else
echo "Skipping Kokkos models due to CMake version requirement"
fi
run_build $name "${GCC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" run_build $name "${GCC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
run_build $name "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" if check_cmake_ver "3.20.0"; then
run_build $name "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} -DENABLE_OPENMP=ON"
else
echo "Skipping RAJA models due to CMake version requirement"
fi
# FIXME fails due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100102 if check_cmake_ver "3.20.0"; then
# FIXME we also got https://github.com/NVIDIA/nccl/issues/494 run_build "cuda_$name" "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \
-DENABLE_CUDA=ON \
-DTARGET=NVIDIA \
-DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \
-DCUDA_ARCH=$NV_ARCH"
else
echo "Skipping RAJA models due to CMake version requirement"
fi
# run_build "cuda_$name" "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \ if check_cmake_ver "3.18.0"; then # CMake >= 3.15 only due to Nvidia's Thrust CMake requirements
# -DENABLE_CUDA=ON \ run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH_CC -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=CUDA"
# -DTARGET=NVIDIA \ # run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=OMP" # FIXME
# -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH_CC -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=CPP"
# -DCUDA_ARCH=$NV_ARCH"
# CMake >= 3.15 only due to Nvidia's Thrust CMake requirements
local current=$("$CMAKE_BIN" --version | head -n 1 | cut -d ' ' -f3)
local required="3.15.0"
if [ "$(printf '%s\n' "$required" "$current" | sort -V | head -n1)" = "$required" ]; then
run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=CUDA"
run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=OMP"
run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=CPP"
# FIXME CUDA Thrust + TBB throws the following error: # FIXME CUDA Thrust + TBB throws the following error:
# /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(9146): error: identifier "__builtin_ia32_rndscaless_round" is undefined # /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(9146): error: identifier "__builtin_ia32_rndscaless_round" is undefined
@ -198,7 +212,7 @@ build_gcc() {
# run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=TBB" # run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=TBB"
else else
echo "CMake version ${current} < ${required}, skipping Thrust models" echo "Skipping Thrust models due to CMake version requirement"
fi fi
} }
@ -216,30 +230,39 @@ build_clang() {
run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
fi fi
run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" if check_cmake_ver "3.20.0"; then
run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} -DENABLE_OPENMP=ON"
else
echo "Skipping RAJA models due to CMake version requirement"
fi
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" if check_cmake_ver "3.16.0"; then
run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
else
echo "Skipping Kokkos models due to CMake version requirement"
fi
run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
for use_onedpl in OFF OPENMP TBB; do for use_onedpl in OFF OPENMP TBB; do
for use_vector in OFF ON; do case "$use_onedpl" in
case "$use_onedpl" in OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" ;;
OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" ;; *) dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
*) dpl_conditional_flags="-DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;; esac
esac run_build $name "${CLANG_CXX:?}" std-data "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${CLANG_CXX:?}" std-data "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector " run_build $name "${CLANG_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${CLANG_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector" # run_build $name "${CLANG_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl" # not yet supported
# run_build $name "${CLANG_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector" # not yet supported
done
done done
run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB" run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
run_build $name "${CLANG_CXX:?}" tbb "$cxx" # build TBB again with the system TBB run_build $name "${CLANG_CXX:?}" tbb "$cxx" # build TBB again with the system TBB
run_build $name "${CLANG_CXX:?}" tbb "$cxx -DUSE_VECTOR=ON" # build with vectors run_build $name "${CLANG_CXX:?}" tbb "$cxx -DUSE_VECTOR=ON" # build with vectors
if check_cmake_ver "3.20.0"; then
run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} -DENABLE_OPENMP=ON"
else
echo "Skipping RAJA models due to CMake version requirement"
fi
# no clang /w RAJA+cuda because it needs nvcc which needs gcc # no clang /w RAJA+cuda because it needs nvcc which needs gcc
} }
@ -249,10 +272,6 @@ build_nvhpc() {
run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
# std again but with vectors
run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DUSE_VECTOR=ON"
run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DUSE_VECTOR=ON"
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY" run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen" run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
} }
@ -291,15 +310,18 @@ build_icpc() {
local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}" local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}"
run_build $name "${ICPC_CXX:?}" omp "$cxx" run_build $name "${ICPC_CXX:?}" omp "$cxx"
run_build $name "${ICPC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" run_build $name "${ICPC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
run_build $name "${ICPC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" if check_cmake_ver "3.20.0"; then
run_build $name "${ICPC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" run_build $name "${ICPC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} -DENABLE_OPENMP=ON"
} else
echo "Skipping RAJA models due to CMake version requirement"
fi
if check_cmake_ver "3.16.0"; then
run_build $name "${ICPC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
else
echo "Skipping Kokkos models due to CMake version requirement"
fi
build_computecpp() {
run_build computecpp_build "compute++" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
-DSYCL_COMPILER=COMPUTECPP \
-DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \
-DOpenCL_LIBRARY=${OCL_LIB:?}"
} }
build_dpcpp() { build_dpcpp() {

View File

@ -8,8 +8,6 @@ register_flag_optional(RAJA_IN_TREE
Make sure to use the release version of RAJA or clone RAJA recursively with submodules. Make sure to use the release version of RAJA or clone RAJA recursively with submodules.
Remember to append RAJA specific flags as well, for example: Remember to append RAJA specific flags as well, for example:
-DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ... -DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ...
For RAJA >= v2022.03.0, remember to use the RAJA prefixed CMake options:
-DRAJA_IN_TREE=... -DRAJA_ENABLE_OPENMP=ON -DRAJA_ENABLE_CUDA=ON ...
See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options
" "") " "")

View File

@ -6,22 +6,10 @@
#include "STDDataStream.h" #include "STDDataStream.h"
#ifdef USE_VECTOR
#define BEGIN(x) (x).begin()
#define END(x) (x).end()
#else
#define BEGIN(x) (x)
#define END(x) ((x) + array_size)
#endif
template <class T> template <class T>
STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device) STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
noexcept : array_size{ARRAY_SIZE}, noexcept : array_size{ARRAY_SIZE},
#ifdef USE_VECTOR
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
#else
a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE)) a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
#endif
{ {
std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl; std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
#ifdef USE_ONEDPL #ifdef USE_ONEDPL
@ -41,55 +29,53 @@ STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
template<class T> template<class T>
STDDataStream<T>::~STDDataStream() { STDDataStream<T>::~STDDataStream() {
#ifndef USE_VECTOR dealloc_raw(a);
dealloc_raw(a); dealloc_raw(b);
dealloc_raw(b); dealloc_raw(c);
dealloc_raw(c);
#endif
} }
template <class T> template <class T>
void STDDataStream<T>::init_arrays(T initA, T initB, T initC) void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
{ {
std::fill(exe_policy, BEGIN(a), END(a), initA); std::fill(exe_policy, a, a + array_size, initA);
std::fill(exe_policy, BEGIN(b), END(b), initB); std::fill(exe_policy, b, b + array_size, initB);
std::fill(exe_policy, BEGIN(c), END(c), initC); std::fill(exe_policy, c, c + array_size, initC);
} }
template <class T> template <class T>
void STDDataStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c) void STDDataStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
{ {
std::copy(BEGIN(a), END(a), h_a.begin()); std::copy(a, a + array_size, h_a.begin());
std::copy(BEGIN(b), END(b), h_b.begin()); std::copy(b, b + array_size, h_b.begin());
std::copy(BEGIN(c), END(c), h_c.begin()); std::copy(c, c + array_size, h_c.begin());
} }
template <class T> template <class T>
void STDDataStream<T>::copy() void STDDataStream<T>::copy()
{ {
// c[i] = a[i] // c[i] = a[i]
std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c)); std::copy(exe_policy, a, a + array_size, c);
} }
template <class T> template <class T>
void STDDataStream<T>::mul() void STDDataStream<T>::mul()
{ {
// b[i] = scalar * c[i]; // b[i] = scalar * c[i];
std::transform(exe_policy, BEGIN(c), END(c), BEGIN(b), [scalar = startScalar](T ci){ return scalar*ci; }); std::transform(exe_policy, c, c + array_size, b, [scalar = startScalar](T ci){ return scalar*ci; });
} }
template <class T> template <class T>
void STDDataStream<T>::add() void STDDataStream<T>::add()
{ {
// c[i] = a[i] + b[i]; // c[i] = a[i] + b[i];
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(c), std::plus<T>()); std::transform(exe_policy, a, a + array_size, b, c, std::plus<T>());
} }
template <class T> template <class T>
void STDDataStream<T>::triad() void STDDataStream<T>::triad()
{ {
// a[i] = b[i] + scalar * c[i]; // a[i] = b[i] + scalar * c[i];
std::transform(exe_policy, BEGIN(b), END(b), BEGIN(c), BEGIN(a), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; }); std::transform(exe_policy, b, b + array_size, c, a, [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
} }
template <class T> template <class T>
@ -99,8 +85,8 @@ void STDDataStream<T>::nstream()
// Need to do in two stages with C++11 STL. // Need to do in two stages with C++11 STL.
// 1: a[i] += b[i] // 1: a[i] += b[i]
// 2: a[i] += scalar * c[i]; // 2: a[i] += scalar * c[i];
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(a), [](T ai, T bi){ return ai + bi; }); std::transform(exe_policy, a, a + array_size, b, a, [](T ai, T bi){ return ai + bi; });
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(c), BEGIN(a), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; }); std::transform(exe_policy, a, a + array_size, c, a, [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
} }
@ -108,7 +94,7 @@ template <class T>
T STDDataStream<T>::dot() T STDDataStream<T>::dot()
{ {
// sum = 0; sum += a[i]*b[i]; return sum; // sum = 0; sum += a[i]*b[i]; return sum;
return std::transform_reduce(exe_policy, BEGIN(a), END(a), BEGIN(b), 0.0); return std::transform_reduce(exe_policy, a, a + array_size, b, 0.0);
} }
void listDevices(void) void listDevices(void)
@ -127,6 +113,3 @@ std::string getDeviceDriver(const int)
} }
template class STDDataStream<float>; template class STDDataStream<float>;
template class STDDataStream<double>; template class STDDataStream<double>;
#undef BEGIN
#undef END

View File

@ -22,12 +22,7 @@ class STDDataStream : public Stream<T>
int array_size; int array_size;
// Device side pointers // Device side pointers
#ifdef USE_VECTOR
std::vector<T> a, b, c;
#else
T *a, *b, *c; T *a, *b, *c;
#endif
public: public:
STDDataStream(const int, int) noexcept; STDDataStream(const int, int) noexcept;

View File

@ -3,10 +3,6 @@ register_flag_optional(CMAKE_CXX_COMPILER
"Any CXX compiler that is supported by CMake detection" "Any CXX compiler that is supported by CMake detection"
"c++") "c++")
register_flag_optional(USE_VECTOR
"Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
"OFF")
register_flag_optional(NVHPC_OFFLOAD register_flag_optional(NVHPC_OFFLOAD
"Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK. "Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`) The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
@ -47,9 +43,6 @@ macro(setup)
register_append_cxx_flags(ANY ${NVHPC_FLAGS}) register_append_cxx_flags(ANY ${NVHPC_FLAGS})
register_append_link_flags(${NVHPC_FLAGS}) register_append_link_flags(${NVHPC_FLAGS})
endif () endif ()
if (USE_VECTOR)
register_definitions(USE_VECTOR)
endif ()
if (USE_TBB) if (USE_TBB)
register_link_library(TBB::tbb) register_link_library(TBB::tbb)
endif () endif ()

View File

@ -10,32 +10,10 @@
#define ALIGNMENT (2*1024*1024) // 2MB #define ALIGNMENT (2*1024*1024) // 2MB
#endif #endif
#ifdef USE_VECTOR
#define BEGIN(x) (x).begin()
#define END(x) (x).end()
#else
#define BEGIN(x) (x)
#define END(x) ((x) + array_size)
#endif
#ifdef USE_VECTOR
#if (defined(__NVCOMPILER) || defined(__NVCOMPILER_LLVM__))
#error "std::vector *is* supported in NVHPC if we capture `this`, however, oneDPL (via SYCL2020) only works correctly with explicit *value* captures."
#endif
#if defined(USE_ONEDPL)
#error "std::vector is unspported: oneDPL (via SYCL2020) only works correctly with explicit *value* captures"
#endif
#endif
template <class T> template <class T>
STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device) STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device)
noexcept : array_size{ARRAY_SIZE}, range(0, array_size), noexcept : array_size{ARRAY_SIZE}, range(0, array_size),
#ifdef USE_VECTOR
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
#else
a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE)) a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
#endif
{ {
std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl; std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
#ifdef USE_ONEDPL #ifdef USE_ONEDPL
@ -55,41 +33,39 @@ noexcept : array_size{ARRAY_SIZE}, range(0, array_size),
template<class T> template<class T>
STDIndicesStream<T>::~STDIndicesStream() { STDIndicesStream<T>::~STDIndicesStream() {
#ifndef USE_VECTOR dealloc_raw(a);
dealloc_raw(a); dealloc_raw(b);
dealloc_raw(b); dealloc_raw(c);
dealloc_raw(c);
#endif
} }
template <class T> template <class T>
void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC) void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
{ {
std::fill(exe_policy, BEGIN(a), END(a), initA); std::fill(exe_policy, a, a + array_size, initA);
std::fill(exe_policy, BEGIN(b), END(b), initB); std::fill(exe_policy, b, b + array_size, initB);
std::fill(exe_policy, BEGIN(c), END(c), initC); std::fill(exe_policy, c, c + array_size, initC);
} }
template <class T> template <class T>
void STDIndicesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c) void STDIndicesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
{ {
std::copy(BEGIN(a), END(a), h_a.begin()); std::copy(a, a + array_size, h_a.begin());
std::copy(BEGIN(b), END(b), h_b.begin()); std::copy(b, b + array_size, h_b.begin());
std::copy(BEGIN(c), END(c), h_c.begin()); std::copy(c, c + array_size, h_c.begin());
} }
template <class T> template <class T>
void STDIndicesStream<T>::copy() void STDIndicesStream<T>::copy()
{ {
// c[i] = a[i] // c[i] = a[i]
std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c)); std::copy(exe_policy, a, a + array_size, c);
} }
template <class T> template <class T>
void STDIndicesStream<T>::mul() void STDIndicesStream<T>::mul()
{ {
// b[i] = scalar * c[i]; // b[i] = scalar * c[i];
std::transform(exe_policy, range.begin(), range.end(), BEGIN(b), [c = this->c, scalar = startScalar](int i) { std::transform(exe_policy, range.begin(), range.end(), b, [c = this->c, scalar = startScalar](int i) {
return scalar * c[i]; return scalar * c[i];
}); });
} }
@ -98,7 +74,7 @@ template <class T>
void STDIndicesStream<T>::add() void STDIndicesStream<T>::add()
{ {
// c[i] = a[i] + b[i]; // c[i] = a[i] + b[i];
std::transform(exe_policy, range.begin(), range.end(), BEGIN(c), [a = this->a, b = this->b](int i) { std::transform(exe_policy, range.begin(), range.end(), c, [a = this->a, b = this->b](int i) {
return a[i] + b[i]; return a[i] + b[i];
}); });
} }
@ -107,7 +83,7 @@ template <class T>
void STDIndicesStream<T>::triad() void STDIndicesStream<T>::triad()
{ {
// a[i] = b[i] + scalar * c[i]; // a[i] = b[i] + scalar * c[i];
std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [b = this->b, c = this->c, scalar = startScalar](int i) { std::transform(exe_policy, range.begin(), range.end(), a, [b = this->b, c = this->c, scalar = startScalar](int i) {
return b[i] + scalar * c[i]; return b[i] + scalar * c[i];
}); });
} }
@ -119,7 +95,7 @@ void STDIndicesStream<T>::nstream()
// Need to do in two stages with C++11 STL. // Need to do in two stages with C++11 STL.
// 1: a[i] += b[i] // 1: a[i] += b[i]
// 2: a[i] += scalar * c[i]; // 2: a[i] += scalar * c[i];
std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [a = this->a, b = this->b, c = this->c, scalar = startScalar](int i) { std::transform(exe_policy, range.begin(), range.end(), a, [a = this->a, b = this->b, c = this->c, scalar = startScalar](int i) {
return a[i] + b[i] + scalar * c[i]; return a[i] + b[i] + scalar * c[i];
}); });
} }
@ -129,7 +105,7 @@ template <class T>
T STDIndicesStream<T>::dot() T STDIndicesStream<T>::dot()
{ {
// sum = 0; sum += a[i]*b[i]; return sum; // sum = 0; sum += a[i]*b[i]; return sum;
return std::transform_reduce(exe_policy, BEGIN(a), END(a), BEGIN(b), 0.0); return std::transform_reduce(exe_policy, a, a + array_size, b, 0.0);
} }
void listDevices(void) void listDevices(void)
@ -148,6 +124,3 @@ std::string getDeviceDriver(const int)
} }
template class STDIndicesStream<float>; template class STDIndicesStream<float>;
template class STDIndicesStream<double>; template class STDIndicesStream<double>;
#undef BEGIN
#undef END

View File

@ -77,12 +77,7 @@ class STDIndicesStream : public Stream<T>
ranged<int> range; ranged<int> range;
// Device side pointers // Device side pointers
#ifdef USE_VECTOR
std::vector<T> a, b, c;
#else
T *a, *b, *c; T *a, *b, *c;
#endif
public: public:
STDIndicesStream(const int, int) noexcept; STDIndicesStream(const int, int) noexcept;

View File

@ -3,10 +3,6 @@ register_flag_optional(CMAKE_CXX_COMPILER
"Any CXX compiler that is supported by CMake detection" "Any CXX compiler that is supported by CMake detection"
"c++") "c++")
register_flag_optional(USE_VECTOR
"Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
"OFF")
register_flag_optional(NVHPC_OFFLOAD register_flag_optional(NVHPC_OFFLOAD
"Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK. "Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`) The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
@ -47,9 +43,6 @@ macro(setup)
register_append_cxx_flags(ANY ${NVHPC_FLAGS}) register_append_cxx_flags(ANY ${NVHPC_FLAGS})
register_append_link_flags(${NVHPC_FLAGS}) register_append_link_flags(${NVHPC_FLAGS})
endif () endif ()
if (USE_VECTOR)
register_definitions(USE_VECTOR)
endif ()
if (USE_TBB) if (USE_TBB)
register_link_library(TBB::tbb) register_link_library(TBB::tbb)
endif () endif ()

View File

@ -5,27 +5,16 @@
// source code // source code
#include "STDRangesStream.hpp" #include "STDRangesStream.hpp"
#include <ranges>
#ifndef ALIGNMENT #ifndef ALIGNMENT
#define ALIGNMENT (2*1024*1024) // 2MB #define ALIGNMENT (2*1024*1024) // 2MB
#endif #endif
#ifdef USE_VECTOR
#define BEGIN(x) (x).begin()
#define END(x) (x).end()
#else
#define BEGIN(x) (x)
#define END(x) ((x) + array_size)
#endif
template <class T> template <class T>
STDRangesStream<T>::STDRangesStream(const int ARRAY_SIZE, int device) STDRangesStream<T>::STDRangesStream(const int ARRAY_SIZE, int device)
noexcept : array_size{ARRAY_SIZE}, noexcept : array_size{ARRAY_SIZE},
#ifdef USE_VECTOR
a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
#else
a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE)) a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
#endif
{ {
std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl; std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
#ifdef USE_ONEDPL #ifdef USE_ONEDPL
@ -45,11 +34,9 @@ noexcept : array_size{ARRAY_SIZE},
template<class T> template<class T>
STDRangesStream<T>::~STDRangesStream() { STDRangesStream<T>::~STDRangesStream() {
#ifndef USE_VECTOR dealloc_raw(a);
dealloc_raw(a); dealloc_raw(b);
dealloc_raw(b); dealloc_raw(c);
dealloc_raw(c);
#endif
} }
template <class T> template <class T>
@ -70,9 +57,9 @@ template <class T>
void STDRangesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c) void STDRangesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
{ {
// Element-wise copy. // Element-wise copy.
std::copy(BEGIN(a), END(a), h_a.begin()); std::copy(a, a + array_size, h_a.begin());
std::copy(BEGIN(b), END(b), h_b.begin()); std::copy(b, b + array_size, h_b.begin());
std::copy(BEGIN(c), END(c), h_c.begin()); std::copy(c, c + array_size, h_c.begin());
} }
template <class T> template <class T>
@ -148,7 +135,7 @@ T STDRangesStream<T>::dot()
return return
std::transform_reduce( std::transform_reduce(
exe_policy, exe_policy,
BEGIN(a), END(a), BEGIN(b), 0.0); a, a + array_size, b, 0.0);
} }
void listDevices(void) void listDevices(void)
@ -168,6 +155,3 @@ std::string getDeviceDriver(const int)
template class STDRangesStream<float>; template class STDRangesStream<float>;
template class STDRangesStream<double>; template class STDRangesStream<double>;
#undef BEGIN
#undef END

View File

@ -21,11 +21,7 @@ class STDRangesStream : public Stream<T>
int array_size; int array_size;
// Device side pointers // Device side pointers
#ifdef USE_VECTOR
std::vector<T> a, b, c;
#else
T *a, *b, *c; T *a, *b, *c;
#endif
public: public:
STDRangesStream(const int, int) noexcept; STDRangesStream(const int, int) noexcept;

View File

@ -3,10 +3,6 @@ register_flag_optional(CMAKE_CXX_COMPILER
"Any CXX compiler that is supported by CMake detection and supports C++20 Ranges" "Any CXX compiler that is supported by CMake detection and supports C++20 Ranges"
"c++") "c++")
register_flag_optional(USE_VECTOR
"Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
"OFF")
register_flag_optional(USE_TBB register_flag_optional(USE_TBB
"No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details." "No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
"OFF") "OFF")
@ -32,10 +28,7 @@ macro(setup)
set(CMAKE_CXX_STANDARD_REQUIRED OFF) set(CMAKE_CXX_STANDARD_REQUIRED OFF)
unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default
# and append our own: # and append our own:
register_append_cxx_flags(ANY -std=c++2a) register_append_cxx_flags(ANY -std=c++20)
if (USE_VECTOR)
register_definitions(USE_VECTOR)
endif ()
if (USE_TBB) if (USE_TBB)
register_link_library(TBB::tbb) register_link_library(TBB::tbb)
endif () endif ()
@ -44,3 +37,10 @@ macro(setup)
register_link_library(oneDPL) register_link_library(oneDPL)
endif () endif ()
endmacro() endmacro()
macro(setup_target NAME)
if (USE_ONEDPL)
target_compile_features(${NAME} INTERFACE cxx_std_20)
target_compile_features(oneDPL INTERFACE cxx_std_20)
endif ()
endmacro()

View File

@ -46,11 +46,12 @@ macro(setup)
# see CUDA.cmake, we're only adding a few Thrust related libraries here # see CUDA.cmake, we're only adding a few Thrust related libraries here
if (POLICY CMP0104) if (POLICY CMP0104)
cmake_policy(SET CMP0104 OLD) cmake_policy(SET CMP0104 NEW)
endif () endif ()
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
# add -forward-unknown-to-host-compiler for compatibility reasons # add -forward-unknown-to-host-compiler for compatibility reasons
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--expt-extended-lambda -forward-unknown-to-host-compiler -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS}) set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--expt-extended-lambda " ${CUDA_EXTRA_FLAGS})
enable_language(CUDA) enable_language(CUDA)
# CMake defaults to -O2 for CUDA at Release, let's wipe that and use the global RELEASE_FLAG # CMake defaults to -O2 for CUDA at Release, let's wipe that and use the global RELEASE_FLAG
# appended later # appended later
@ -63,6 +64,7 @@ macro(setup)
# XXX NVHPC >= 22.3 has cub-config in `Linux_x86_64/22.3/cuda/11.6/lib64/cmake/cub/` # XXX NVHPC >= 22.3 has cub-config in `Linux_x86_64/22.3/cuda/11.6/lib64/cmake/cub/`
# same thing for thrust # same thing for thrust
if (SDK_DIR) if (SDK_DIR)
list(APPEND CMAKE_PREFIX_PATH ${SDK_DIR})
find_package(CUB REQUIRED CONFIG PATHS ${SDK_DIR}/cub) find_package(CUB REQUIRED CONFIG PATHS ${SDK_DIR}/cub)
find_package(Thrust REQUIRED CONFIG PATHS ${SDK_DIR}/thrust) find_package(Thrust REQUIRED CONFIG PATHS ${SDK_DIR}/thrust)
else () else ()
@ -73,9 +75,11 @@ macro(setup)
message(STATUS "Using Thrust backend: ${BACKEND}") message(STATUS "Using Thrust backend: ${BACKEND}")
# this creates the interface that we can link to # this creates the interface that we can link to
thrust_create_target(Thrust HOST CPP DEVICE ${BACKEND}) thrust_create_target(Thrust${BACKEND}
HOST CPP
DEVICE ${BACKEND})
register_link_library(Thrust) register_link_library(Thrust${BACKEND})
elseif (${THRUST_IMPL} STREQUAL "ROCM") elseif (${THRUST_IMPL} STREQUAL "ROCM")
if (SDK_DIR) if (SDK_DIR)
find_package(rocprim REQUIRED CONFIG PATHS ${SDK_DIR}/rocprim) find_package(rocprim REQUIRED CONFIG PATHS ${SDK_DIR}/rocprim)