From 72be9f698035b542bc1734dcbbd14543db55e244 Mon Sep 17 00:00:00 2001
From: Tom Lin <tom91136@gmail.com>
Date: Sun, 24 Sep 2023 21:11:35 +0100
Subject: [PATCH] Fix up CI, resolves #145, supersedes #154 Drop vector for
 std-* models

---
 .github/workflows/main.yaml          |  84 +++++++++++++++--
 CMakeLists.txt                       |  24 +++--
 src/ci-prepare-bionic.sh             |  92 +++++++++++--------
 src/ci-test-compile.sh               | 132 ++++++++++++++++-----------
 src/raja/model.cmake                 |   2 -
 src/std-data/STDDataStream.cpp       |  49 ++++------
 src/std-data/STDDataStream.h         |   5 -
 src/std-data/model.cmake             |   7 --
 src/std-indices/STDIndicesStream.cpp |  57 +++---------
 src/std-indices/STDIndicesStream.h   |   5 -
 src/std-indices/model.cmake          |   7 --
 src/std-ranges/STDRangesStream.cpp   |  32 ++-----
 src/std-ranges/STDRangesStream.hpp   |   4 -
 src/std-ranges/model.cmake           |  16 ++--
 src/thrust/model.cmake               |  12 ++-
 15 files changed, 278 insertions(+), 250 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 08eed2d..8dc6905 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -12,7 +12,7 @@ on:
 jobs:
 
   test-rust:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     defaults:
       run:
         working-directory: ./src/rust/rust-stream
@@ -28,7 +28,7 @@ jobs:
         run: ./target/release/rust-stream --arraysize 2048
 
   test-java:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     defaults:
       run:
         working-directory: ./src/java/java-stream
@@ -41,7 +41,7 @@ jobs:
         run: java -jar target/java-stream.jar --arraysize 2048
 
   test-julia:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-22.04
     defaults:
       run:
         working-directory: ./src/julia/JuliaStream.jl
@@ -69,8 +69,24 @@ jobs:
         run: julia             --project src/AMDGPUStream.jl      --list
 
 
+  setup-cpp:
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Cache compiler
+        # if: ${{ !env.ACT }}
+        id: prepare-compilers
+        uses: actions/cache@v2
+        with:
+          path: ./compilers
+          key: ${{ runner.os }}-${{ hashFiles('./src/ci-prepare-bionic.sh') }}
+
+      - name: Prepare compilers
+        if: steps.prepare-compilers.outputs.cache-hit != 'true'
+        run: source ./src/ci-prepare-bionic.sh ./compilers SETUP false || true
+
   test-cpp:
-    runs-on: ubuntu-18.04
+    needs: setup-cpp
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v2
 
@@ -84,15 +100,15 @@ jobs:
 
       - name: Prepare compilers
         if: steps.prepare-compilers.outputs.cache-hit != 'true'
-        run: source ./src/ci-prepare-bionic.sh ./compilers SETUP true || true
+        run: source ./src/ci-prepare-bionic.sh ./compilers SETUP false || true
 
       - name: Setup test environment
         run: source ./src/ci-prepare-bionic.sh ./compilers VARS false || true
 
         # Enable tmate debugging of manually-triggered workflows if the input option was provided
-      - name: Setup tmate session
-        uses: mxschmitt/action-tmate@v3
-        if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }}
+      #      - name: Setup tmate session
+      #        uses: mxschmitt/action-tmate@v3
+      #        if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled }}
 
       - name: Test compile gcc     @ CMake 3.13
         if: ${{ ! cancelled() }}
@@ -167,4 +183,54 @@ jobs:
         run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }}
       - name: Test compile hipsycl @ CMake 3.18
         if: ${{ ! cancelled() }}
-        run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }}
\ No newline at end of file
+        run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }}
+
+      - name: Test compile gcc     @ CMake 3.20
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_20_BIN }}
+      - name: Test compile clang   @ CMake 3.20
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_20_BIN }}
+      - name: Test compile nvhpc   @ CMake 3.20
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_20_BIN }}
+      - name: Test compile aocc    @ CMake 3.20
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_20_BIN }}
+      - name: Test compile aomp    @ CMake 3.20
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_20_BIN }}
+      - name: Test compile hip     @ CMake 3.20
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_20_BIN }}
+      - name: Test compile dpcpp   @ CMake 3.20
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_20_BIN }}
+      - name: Test compile hipsycl @ CMake 3.20
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_20_BIN }}
+
+      - name: Test compile gcc     @ CMake 3.24
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_24_BIN }}
+      - name: Test compile clang   @ CMake 3.24
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_24_BIN }}
+      - name: Test compile nvhpc   @ CMake 3.24
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_24_BIN }}
+      - name: Test compile aocc    @ CMake 3.24
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_24_BIN }}
+      - name: Test compile aomp    @ CMake 3.24
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_24_BIN }}
+      - name: Test compile hip     @ CMake 3.24
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_24_BIN }}
+      - name: Test compile dpcpp   @ CMake 3.24
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_24_BIN }}
+      - name: Test compile hipsycl @ CMake 3.24
+        if: ${{ ! cancelled() }}
+        run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }}
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index da112a4..879e463 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,9 @@
 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 
+if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
+    cmake_policy(SET CMP0135 NEW)
+endif ()
+
 project(BabelStream VERSION 4.0 LANGUAGES CXX)
 
 # uncomment for debugging build issues:
@@ -71,15 +75,19 @@ hint_flag(CXX_EXTRA_LINKER_FLAGS "
 # Honor user's CXX_EXTRA_LINK_FLAGS
 set(CXX_EXTRA_LINK_FLAGS ${CXX_EXTRA_FLAGS} ${CXX_EXTRA_LINK_FLAGS})
 
-option(USE_TBB "Enable oneTBB library for *supported* models. Enabling this on models that
+option(USE_TBB "Enable the oneTBB library for *supported* models. Enabling this on models that
                 don't explicitly link against TBB is a no-op, see description of your selected
                 model on how this is used." OFF)
 
-if (USE_TBB)
+option(FETCH_TBB "Fetch (download) the oneTBB library for *supported* models. This uses CMake's
+                  FetchContent feature. Specify version by setting FETCH_TBB_VERSION" OFF)
+set(FETCH_TBB_VERSION "v2021.10.0" CACHE STRING "Specify version of oneTBB to use if FETCH_TBB is ON")
+
+if (FETCH_TBB)
     FetchContent_Declare(
             TBB
             GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git
-            GIT_TAG v2021.9.0
+            GIT_TAG "${FETCH_TBB_VERSION}"
     )
     # Don't fail builds on waring (TBB has -Wall while not being free of warnings from unused symbols...)
     set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
@@ -92,15 +100,19 @@ if (USE_TBB)
     endif ()
 endif ()
 
-option(USE_ONEDPL "Enable oneDPL library for *supported* models. Enabling this on models that
+option(USE_ONEDPL "Enable the oneDPL library for *supported* models. Enabling this on models that
                    don't explicitly link against DPL is a no-op, see description of your selected
                    model on how this is used." OFF)
 
-if (USE_ONEDPL)
+option(FETCH_ONEDPL "Fetch (download) the oneDPL library for *supported* models. This uses CMake's
+                  FetchContent feature. Specify version by setting FETCH_ONEDPL_VERSION" OFF)
+set(FETCH_ONEDPL_VERSION "oneDPL-2022.2.0-rc1" CACHE STRING "Specify version of oneTBB to use if FETCH_ONEDPL is ON")
+
+if (FETCH_ONEDPL)
     FetchContent_Declare(
             oneDPL
             GIT_REPOSITORY https://github.com/oneapi-src/oneDPL.git
-            GIT_TAG oneDPL-2022.2.0-rc1
+            GIT_TAG "${FETCH_ONEDPL_VERSION}"
     )
     string(TOLOWER ${USE_ONEDPL} ONEDPL_BACKEND)
     # XXX oneDPL looks for omp instead of openmp, which mismatches(!) with ONEDPL_PAR_BACKEND if using find_package
diff --git a/src/ci-prepare-bionic.sh b/src/ci-prepare-bionic.sh
index 78bbd33..6a1a959 100755
--- a/src/ci-prepare-bionic.sh
+++ b/src/ci-prepare-bionic.sh
@@ -83,6 +83,8 @@ get() {
     if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then
       echo "$name not found, downloading..."
       wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name"
+    else
+      echo "$name found, skipping download..."
     fi
   fi
 }
@@ -92,13 +94,15 @@ get_and_untar() {
   local pkg_url="$2"
   if [ "$SETUP" = true ]; then
     if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then
-      echo "$name not found, downloading..."
+      echo "$name not found, downloading ($pkg_url)..."
       wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name"
     fi
     echo "Preparing to extract $name ..."
     tar -xf "$name"
     echo "$name extracted, deleting archive ..."
     rm -f "$name" # delete for space
+  else
+    echo "Skipping setup for $name ($pkg_url)..."
   fi
 }
 
@@ -119,10 +123,10 @@ verify_dir_exists() {
 setup_aocc() {
   echo "Preparing AOCC"
 
-  local aocc_ver="2.3.0"
+  local aocc_ver="4.0.0"
   local tarball="aocc-$aocc_ver.tar.xz"
   # XXX it's actually XZ compressed, so it should be tar.xz
-  local AOCC_URL="http://developer.amd.com/wordpress/media/files/aocc-compiler-2.3.0.tar"
+  local AOCC_URL="https://download.amd.com/developer/eula/aocc-compiler/aocc-compiler-${aocc_ver}.tar"
   # local AOCC_URL="http://localhost:8000/aocc-compiler-2.3.0.tar"
 
   get_and_untar "$tarball" "$AOCC_URL"
@@ -133,10 +137,10 @@ setup_aocc() {
 }
 
 setup_nvhpc() {
- echo "Preparing Nvidia HPC SDK"
-  local nvhpc_ver="22.3"
-  local nvhpc_release="2022_223"
-  local cuda_ver="11.6"
+  echo "Preparing Nvidia HPC SDK"
+  local nvhpc_ver="23.1" # TODO FIXME > 23.1 has a bug with -A
+  local nvhpc_release="2023_231"
+  local cuda_ver="12.0"
 
   local tarball="nvhpc_$nvhpc_ver.tar.gz"
 
@@ -145,7 +149,7 @@ setup_nvhpc() {
 
   local sdk_dir="$PWD/nvhpc_${nvhpc_release}_Linux_x86_64_cuda_$cuda_ver/install_components/Linux_x86_64/$nvhpc_ver"
   local bin_dir="$sdk_dir/compilers/bin"
-  "$bin_dir/makelocalrc" "$bin_dir" -x
+  "$bin_dir/makelocalrc" -d "$bin_dir" -x -gpp g++-12 -gcc gcc-12 -g77 gfortran-12
 
   export_var NVHPC_SDK_DIR "$sdk_dir"
   export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/$cuda_ver"
@@ -166,7 +170,8 @@ setup_nvhpc() {
 
 setup_aomp() {
   echo "Preparing AOMP"
-  local AOMP_URL="https://github.com/ROCm-Developer-Tools/aomp/releases/download/rel_11.12-0/aomp_Ubuntu1804_11.12-0_amd64.deb"
+  local aomp_ver="18.0-0"
+  local AOMP_URL="https://github.com/ROCm-Developer-Tools/aomp/releases/download/rel_${aomp_ver}/aomp_Ubuntu2204_${aomp_ver}_amd64.deb"
   # local AOMP_URL="http://0.0.0.0:8000/aomp_Ubuntu1804_11.12-0_amd64.deb"
   get_and_install_deb "aomp" "aomp" "$AOMP_URL"
 
@@ -189,9 +194,10 @@ setup_oclcpu() {
 
 setup_kokkos() {
   echo "Preparing Kokkos"
-  local kokkos_ver="3.3.01"
+  local kokkos_ver="4.1.00"
   local tarball="kokkos-$kokkos_ver.tar.gz"
 
+
   local url="https://github.com/kokkos/kokkos/archive/$kokkos_ver.tar.gz"
   # local url="http://localhost:8000/$kokkos_ver.tar.gz"
 
@@ -203,10 +209,10 @@ setup_kokkos() {
 
 setup_raja() {
   echo "Preparing RAJA"
-  local raja_ver="0.13.0"
+  local raja_ver="2023.06.1"
   local tarball="raja-$raja_ver.tar.gz"
 
-  local url="https://github.com/LLNL/RAJA/releases/download/v0.13.0/RAJA-v$raja_ver.tar.gz"
+  local url="https://github.com/LLNL/RAJA/releases/download/v$raja_ver/RAJA-v$raja_ver.tar.gz"
   # local url="http://localhost:8000/RAJA-v$raja_ver.tar.gz"
 
   get_and_untar "$tarball" "$url"
@@ -217,7 +223,7 @@ setup_raja() {
 
 setup_tbb() {
   echo "Preparing TBB"
-  local tbb_ver="2021.2.0"
+  local tbb_ver="2021.9.0"
   local tarball="oneapi-tbb-$tbb_ver-lin.tgz"
 
   local url="https://github.com/oneapi-src/oneTBB/releases/download/v$tbb_ver/oneapi-tbb-$tbb_ver-lin.tgz"
@@ -231,9 +237,9 @@ setup_tbb() {
 
 setup_clang_gcc() {
 
-  sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10 clang libomp-dev
+  sudo apt-get install -y -qq gcc-12-offload-nvptx gcc-12-offload-amdgcn libtbb2 libtbb-dev g++-12 clang libomp-dev libc6
 
-  export_var GCC_CXX "$(which g++-10)"
+  export_var GCC_CXX "$(which g++-12)"
   verify_bin_exists "$GCC_CXX"
   "$GCC_CXX" --version
 
@@ -254,7 +260,7 @@ setup_clang_gcc() {
 }
 
 setup_rocm() {
-  sudo apt-get install -y -qq rocm-dev rocthrust-dev
+  sudo apt-get install -y rocm-dev rocthrust-dev
   export_var ROCM_PATH "/opt/rocm"
   export_var PATH "$ROCM_PATH/bin:$PATH" # ROCm needs this for many of their libraries' CMake build to work
   export_var HIP_CXX "$ROCM_PATH/bin/hipcc"
@@ -265,7 +271,7 @@ setup_rocm() {
 
 setup_dpcpp() {
 
-  local nightly="20210106"
+  local nightly="20230615"
   local tarball="dpcpp-$nightly.tar.gz"
 
   local url="https://github.com/intel/llvm/releases/download/sycl-nightly/$nightly/dpcpp-compiler.tar.gz"
@@ -282,22 +288,22 @@ setup_dpcpp() {
 setup_hipsycl() {
 
   sudo apt-get install -y -qq libboost-fiber-dev libboost-context-dev
-  local hipsycl_ver="0.9.0"
+  local hipsycl_ver="0.9.1"
   local tarball="v$hipsycl_ver.tar.gz"
   local install_dir="$PWD/hipsycl_dist_$hipsycl_ver"
 
-  local url="https://github.com/illuhad/hipSYCL/archive/v$hipsycl_ver.tar.gz"
-  # local url="http://localhost:8000/hipSYCL-$hipsycl_ver.tar.gz"
+  local url="https://github.com/AdaptiveCpp/AdaptiveCpp/archive/v$hipsycl_ver.tar.gz"
+  # local url="http://localhost:8000/AdaptiveCpp-$hipsycl_ver.tar.gz"
 
   get_and_untar "$tarball" "$url"
 
   if [ "$SETUP" = true ]; then
-    local src="$PWD/hipSYCL-$hipsycl_ver"
+    local src="$PWD/AdaptiveCpp-$hipsycl_ver"
     rm -rf "$src/build"
     rm -rf "$install_dir"
     cmake "-B$src/build" "-H$src" \
-      -DCMAKE_C_COMPILER="$(which gcc-10)" \
-      -DCMAKE_CXX_COMPILER="$(which g++-10)" \
+      -DCMAKE_C_COMPILER="$(which gcc-12)" \
+      -DCMAKE_CXX_COMPILER="$(which g++-12)" \
       -DCMAKE_INSTALL_PREFIX="$install_dir" \
       -DWITH_ROCM_BACKEND=OFF \
       -DWITH_CUDA_BACKEND=OFF \
@@ -312,25 +318,20 @@ setup_hipsycl() {
   check_size
 }
 
-setup_computecpp() {
-  echo "TODO ComputeCpp requires registration+login to download"
-}
-
 if [ "${GITHUB_ACTIONS:-false}" = true ]; then
   echo "Running in GitHub Actions, defaulting to special export"
   TERM=xterm
   export TERM=xterm
-
   # drop the lock in case we got one from a failed run
-  rm /var/lib/dpkg/lock-frontend || true
-  rm /var/cache/apt/archives/lock || true
-
-  wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add -
-  echo "deb http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
-  echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.5 ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list
+  rm -rf /var/lib/dpkg/lock-frontend || true
+  rm -rf /var/cache/apt/archives/lock || true
 
+  mkdir --parents --mode=0755 /etc/apt/keyrings
+  wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
+  echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/5.7 jammy main' | sudo tee /etc/apt/sources.list.d/rocm.list
+  echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
   sudo apt-get update -qq
-  sudo apt-get install -y -qq cmake
+  sudo apt-get install cmake gcc g++ libelf-dev libdrm-amdgpu1 libnuma-dev
 
   if [ "$SETUP" = true ]; then
     echo "Deleting extra packages for space in 2 seconds..."
@@ -340,6 +341,7 @@ if [ "${GITHUB_ACTIONS:-false}" = true ]; then
     sudo apt-get autoremove -y
     check_size
   fi
+  sudo apt-get upgrade -qq
 else
   echo "Running locally, defaulting to standard export"
 fi
@@ -368,6 +370,18 @@ setup_cmake() {
   verify_bin_exists "$CMAKE_3_18_BIN"
   "$CMAKE_3_18_BIN" --version
 
+  get "cmake-3.20.sh" "$cmake_release/v3.20.4/cmake-3.20.4-linux-x86_64.sh"
+  chmod +x "./cmake-3.20.sh" && "./cmake-3.20.sh" --skip-license --include-subdir
+  export_var CMAKE_3_20_BIN "$PWD/cmake-3.20.4-linux-x86_64/bin/cmake"
+  verify_bin_exists "$CMAKE_3_20_BIN"
+  "$CMAKE_3_20_BIN" --version
+
+  get "cmake-3.24.sh" "$cmake_release/v3.24.4/cmake-3.24.4-linux-x86_64.sh"
+  chmod +x "./cmake-3.24.sh" && "./cmake-3.24.sh" --skip-license --include-subdir
+  export_var CMAKE_3_24_BIN "$PWD/cmake-3.24.4-linux-x86_64/bin/cmake"
+  verify_bin_exists "$CMAKE_3_24_BIN"
+  "$CMAKE_3_24_BIN" --version
+
   check_size
 
 }
@@ -385,6 +399,10 @@ if [ "$PARALLEL" = true ]; then
   setup_tbb &
   wait
 else
+  # these need apt
+  setup_clang_gcc
+  setup_rocm
+  setup_hipsycl
   setup_cmake
   setup_aocc
   setup_oclcpu
@@ -394,10 +412,6 @@ else
   setup_kokkos
   setup_raja
   setup_tbb
-  # these need apt
-  setup_clang_gcc
-  setup_rocm
-  setup_hipsycl
 fi
 
 echo "Done!"
diff --git a/src/ci-test-compile.sh b/src/ci-test-compile.sh
index d3fc5b7..610c3f0 100755
--- a/src/ci-test-compile.sh
+++ b/src/ci-test-compile.sh
@@ -120,10 +120,21 @@ run_build() {
 # CLANG_OMP_OFFLOAD_NVIDIA=false
 ###
 
+NV_ARCH_CC="70"
 AMD_ARCH="gfx_903"
-NV_ARCH="sm_70"
+NV_ARCH="sm_${NV_ARCH_CC}"
 NV_ARCH_CCXY="cuda${NVHPC_CUDA_VER:?},cc80"
 
+check_cmake_ver(){
+  local current=$("$CMAKE_BIN" --version | head -n 1 | cut -d ' ' -f3)
+  local required=$1
+  if [ "$(printf '%s\n' "$required" "$current" | sort -V | head -n1)" = "$required" ]; then
+    return 0
+  else
+    return 1
+  fi
+}
+
 build_gcc() {
   local name="gcc_build"
   local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}"
@@ -138,14 +149,12 @@ build_gcc() {
   for use_onedpl in OFF OPENMP TBB; do
     case "$use_onedpl" in
       OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"  ;;
-      *)   dpl_conditional_flags="-DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
+      *)   dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
     esac
-    for use_vector in OFF ON; do
-      # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
-      run_build $name "${GCC_CXX:?}" std-data    "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector"
-      run_build $name "${GCC_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector"
-      run_build $name "${GCC_CXX:?}" std-ranges  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector"
-    done
+    # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
+    run_build $name "${GCC_CXX:?}" std-data    "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
+    run_build $name "${GCC_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
+    run_build $name "${GCC_CXX:?}" std-ranges  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
   done
 
   run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
@@ -153,40 +162,45 @@ build_gcc() {
   run_build $name "${GCC_CXX:?}" tbb "$cxx -DUSE_VECTOR=ON" # build with vectors
 
   if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
-    run_build "amd_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa"
+    run_build "amd_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa;-fno-stack-protector;-fcf-protection=none"
     run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
   fi
 
   if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
-    run_build "nvidia_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none"
+    run_build "nvidia_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none;-fno-stack-protector;-fcf-protection=none"
     run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
   fi
 
   run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
   run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
   run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
-  #  run_build $name "${CC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON"
-  run_build "cuda_$name" "${GCC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
+  if check_cmake_ver "3.16.0"; then
+    #  run_build $name "${CC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON"
+    run_build "cuda_$name" "${GCC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
+  else
+    echo "Skipping Kokkos models due to CMake version requirement"
+  fi
   run_build $name "${GCC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
-  run_build $name "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
+  if check_cmake_ver "3.20.0"; then
+    run_build $name "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} -DENABLE_OPENMP=ON"
+  else
+    echo "Skipping RAJA models due to CMake version requirement"
+  fi
 
-#  FIXME fails due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100102
-#  FIXME we also got https://github.com/NVIDIA/nccl/issues/494
+  if check_cmake_ver "3.20.0"; then
+   run_build "cuda_$name" "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \
+     -DENABLE_CUDA=ON \
+     -DTARGET=NVIDIA \
+     -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \
+     -DCUDA_ARCH=$NV_ARCH"
+  else
+    echo "Skipping RAJA models due to CMake version requirement"
+  fi
 
-#  run_build "cuda_$name" "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \
-#  -DENABLE_CUDA=ON \
-#  -DTARGET=NVIDIA \
-#  -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \
-#  -DCUDA_ARCH=$NV_ARCH"
-
-
-  # CMake >= 3.15 only due to Nvidia's Thrust CMake requirements
-  local current=$("$CMAKE_BIN" --version | head -n 1 | cut -d ' ' -f3)
-  local required="3.15.0"
-  if [ "$(printf '%s\n' "$required" "$current" | sort -V | head -n1)" = "$required" ]; then
-    run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=CUDA"
-    run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=OMP"
-    run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=CPP"
+  if check_cmake_ver "3.18.0"; then # CMake >= 3.15 only due to Nvidia's Thrust CMake requirements
+    run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH_CC -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=CUDA"
+#    run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=OMP" # FIXME
+    run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH_CC -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=CPP"
 
     # FIXME CUDA Thrust + TBB throws the following error:
     #    /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(9146): error: identifier "__builtin_ia32_rndscaless_round" is undefined
@@ -198,7 +212,7 @@ build_gcc() {
 
     #    run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/lib64/cmake -DTHRUST_IMPL=CUDA -DBACKEND=TBB"
   else
-    echo "CMake version ${current} < ${required}, skipping Thrust models"
+    echo "Skipping Thrust models due to CMake version requirement"
   fi
 
 }
@@ -216,30 +230,39 @@ build_clang() {
     run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
   fi
 
-  run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
+  if check_cmake_ver "3.20.0"; then
+    run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} -DENABLE_OPENMP=ON"
+  else
+    echo "Skipping RAJA models due to CMake version requirement"
+  fi
   run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
   run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
   run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
-  run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
+  if check_cmake_ver "3.16.0"; then
+    run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
+  else
+    echo "Skipping Kokkos models due to CMake version requirement"
+  fi
   run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
 
   for use_onedpl in OFF OPENMP TBB; do
-    for use_vector in OFF ON; do
-      case "$use_onedpl" in
-        OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" ;;
-        *)   dpl_conditional_flags="-DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0"  ;;
-      esac
-      run_build $name "${CLANG_CXX:?}" std-data     "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector "
-      run_build $name "${CLANG_CXX:?}" std-indices  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector"
-      # run_build $name "${CLANG_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DUSE_VECTOR=$use_vector" # not yet supported
-    done
+    case "$use_onedpl" in
+      OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" ;;
+      *)   dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0"  ;;
+    esac
+    run_build $name "${CLANG_CXX:?}" std-data     "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
+    run_build $name "${CLANG_CXX:?}" std-indices  "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
+    # run_build $name "${CLANG_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl" # not yet supported
   done
 
   run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
   run_build $name "${CLANG_CXX:?}" tbb "$cxx" # build TBB again with the system TBB
   run_build $name "${CLANG_CXX:?}" tbb "$cxx -DUSE_VECTOR=ON" # build with vectors
-
-  run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
+  if check_cmake_ver "3.20.0"; then
+    run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} -DENABLE_OPENMP=ON"
+  else
+    echo "Skipping RAJA models due to CMake version requirement"
+  fi
   # no clang /w RAJA+cuda because it needs nvcc which needs gcc
 }
 
@@ -249,10 +272,6 @@ build_nvhpc() {
   run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
   run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
 
-  # std again but with vectors
-  run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DUSE_VECTOR=ON"
-  run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DUSE_VECTOR=ON"
-
   run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
   run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
 }
@@ -291,15 +310,18 @@ build_icpc() {
   local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}"
   run_build $name "${ICPC_CXX:?}" omp "$cxx"
   run_build $name "${ICPC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
-  run_build $name "${ICPC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
-  run_build $name "${ICPC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
-}
+  if check_cmake_ver "3.20.0"; then
+    run_build $name "${ICPC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} -DENABLE_OPENMP=ON"
+  else
+    echo "Skipping RAJA models due to CMake version requirement"
+  fi
+
+  if check_cmake_ver "3.16.0"; then
+    run_build $name "${ICPC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
+  else
+    echo "Skipping Kokkos models due to CMake version requirement"
+  fi
 
-build_computecpp() {
-  run_build computecpp_build "compute++" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
-  -DSYCL_COMPILER=COMPUTECPP \
-  -DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \
-  -DOpenCL_LIBRARY=${OCL_LIB:?}"
 }
 
 build_dpcpp() {
diff --git a/src/raja/model.cmake b/src/raja/model.cmake
index eb4788c..bf30631 100644
--- a/src/raja/model.cmake
+++ b/src/raja/model.cmake
@@ -8,8 +8,6 @@ register_flag_optional(RAJA_IN_TREE
          Make sure to use the release version of RAJA or clone RAJA recursively with submodules.
          Remember to append RAJA specific flags as well, for example:
              -DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ...
-         For RAJA >= v2022.03.0, remember to use the RAJA prefixed CMake options:
-             -DRAJA_IN_TREE=... -DRAJA_ENABLE_OPENMP=ON -DRAJA_ENABLE_CUDA=ON ...
          See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options
 " "")
 
diff --git a/src/std-data/STDDataStream.cpp b/src/std-data/STDDataStream.cpp
index 3d7ef18..e426835 100644
--- a/src/std-data/STDDataStream.cpp
+++ b/src/std-data/STDDataStream.cpp
@@ -6,22 +6,10 @@
 
 #include "STDDataStream.h"
 
-#ifdef USE_VECTOR
-#define BEGIN(x) (x).begin()
-#define END(x) (x).end()
-#else
-#define BEGIN(x) (x)
-#define END(x) ((x) + array_size)
-#endif
-
 template <class T>
 STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
   noexcept : array_size{ARRAY_SIZE},
-#ifdef USE_VECTOR
-  a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
-#else
   a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
-#endif
 {
     std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
 #ifdef USE_ONEDPL
@@ -41,55 +29,53 @@ STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
 
 template<class T>
 STDDataStream<T>::~STDDataStream() {
-#ifndef USE_VECTOR
-    dealloc_raw(a);
-    dealloc_raw(b);
-    dealloc_raw(c);
-#endif
+  dealloc_raw(a);
+  dealloc_raw(b);
+  dealloc_raw(c);
 }
 
 template <class T>
 void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
 {
-  std::fill(exe_policy, BEGIN(a), END(a), initA);
-  std::fill(exe_policy, BEGIN(b), END(b), initB);
-  std::fill(exe_policy, BEGIN(c), END(c), initC);
+  std::fill(exe_policy, a, a + array_size, initA);
+  std::fill(exe_policy, b, b + array_size, initB);
+  std::fill(exe_policy, c, c + array_size, initC);
 }
 
 template <class T>
 void STDDataStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
 {
-  std::copy(BEGIN(a), END(a), h_a.begin());
-  std::copy(BEGIN(b), END(b), h_b.begin());
-  std::copy(BEGIN(c), END(c), h_c.begin());
+  std::copy(a, a + array_size, h_a.begin());
+  std::copy(b, b + array_size, h_b.begin());
+  std::copy(c, c + array_size, h_c.begin());
 }
 
 template <class T>
 void STDDataStream<T>::copy()
 {
   // c[i] = a[i]
-  std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c));
+  std::copy(exe_policy, a, a + array_size, c);
 }
 
 template <class T>
 void STDDataStream<T>::mul()
 {
   //  b[i] = scalar * c[i];
-  std::transform(exe_policy, BEGIN(c), END(c), BEGIN(b), [scalar = startScalar](T ci){ return scalar*ci; });
+  std::transform(exe_policy, c, c + array_size, b, [scalar = startScalar](T ci){ return scalar*ci; });
 }
 
 template <class T>
 void STDDataStream<T>::add()
 {
   //  c[i] = a[i] + b[i];
-  std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(c), std::plus<T>());
+  std::transform(exe_policy, a, a + array_size, b, c, std::plus<T>());
 }
 
 template <class T>
 void STDDataStream<T>::triad()
 {
   //  a[i] = b[i] + scalar * c[i];
-  std::transform(exe_policy, BEGIN(b), END(b), BEGIN(c), BEGIN(a), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
+  std::transform(exe_policy, b, b + array_size, c, a, [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
 }
 
 template <class T>
@@ -99,8 +85,8 @@ void STDDataStream<T>::nstream()
   //  Need to do in two stages with C++11 STL.
   //  1: a[i] += b[i]
   //  2: a[i] += scalar * c[i];
-  std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(a), [](T ai, T bi){ return ai + bi; });
-  std::transform(exe_policy, BEGIN(a), END(a), BEGIN(c), BEGIN(a), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
+  std::transform(exe_policy, a, a + array_size, b, a, [](T ai, T bi){ return ai + bi; });
+  std::transform(exe_policy, a, a + array_size, c, a, [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
 }
    
 
@@ -108,7 +94,7 @@ template <class T>
 T STDDataStream<T>::dot()
 {
   // sum = 0; sum += a[i]*b[i]; return sum;
-  return std::transform_reduce(exe_policy, BEGIN(a), END(a), BEGIN(b), 0.0);
+  return std::transform_reduce(exe_policy, a, a + array_size, b, 0.0);
 }
 
 void listDevices(void)
@@ -127,6 +113,3 @@ std::string getDeviceDriver(const int)
 }
 template class STDDataStream<float>;
 template class STDDataStream<double>;
-
-#undef BEGIN
-#undef END
diff --git a/src/std-data/STDDataStream.h b/src/std-data/STDDataStream.h
index 911a621..65e1ace 100644
--- a/src/std-data/STDDataStream.h
+++ b/src/std-data/STDDataStream.h
@@ -22,12 +22,7 @@ class STDDataStream : public Stream<T>
     int array_size;
 
     // Device side pointers
-#ifdef USE_VECTOR
-    std::vector<T> a, b, c;
-#else
     T *a, *b, *c;
-#endif
-
 
   public:
     STDDataStream(const int, int) noexcept;
diff --git a/src/std-data/model.cmake b/src/std-data/model.cmake
index e1697b6..e9e7099 100644
--- a/src/std-data/model.cmake
+++ b/src/std-data/model.cmake
@@ -3,10 +3,6 @@ register_flag_optional(CMAKE_CXX_COMPILER
         "Any CXX compiler that is supported by CMake detection"
         "c++")
 
-register_flag_optional(USE_VECTOR
-        "Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
-        "OFF")
-
 register_flag_optional(NVHPC_OFFLOAD
         "Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
          The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
@@ -47,9 +43,6 @@ macro(setup)
         register_append_cxx_flags(ANY ${NVHPC_FLAGS})
         register_append_link_flags(${NVHPC_FLAGS})
     endif ()
-    if (USE_VECTOR)
-        register_definitions(USE_VECTOR)
-    endif ()
     if (USE_TBB)
         register_link_library(TBB::tbb)
     endif ()
diff --git a/src/std-indices/STDIndicesStream.cpp b/src/std-indices/STDIndicesStream.cpp
index 6e13597..1cf1ccc 100644
--- a/src/std-indices/STDIndicesStream.cpp
+++ b/src/std-indices/STDIndicesStream.cpp
@@ -10,32 +10,10 @@
 #define ALIGNMENT (2*1024*1024) // 2MB
 #endif
 
-#ifdef USE_VECTOR
-#define BEGIN(x) (x).begin()
-#define END(x) (x).end()
-#else
-#define BEGIN(x) (x)
-#define END(x) ((x) + array_size)
-#endif
-
-#ifdef USE_VECTOR
-#if (defined(__NVCOMPILER) || defined(__NVCOMPILER_LLVM__))
-#error "std::vector *is* supported in NVHPC if we capture `this`, however, oneDPL (via SYCL2020) only works correctly with explicit *value* captures."
-#endif
-
-#if defined(USE_ONEDPL)
-#error "std::vector is unspported: oneDPL (via SYCL2020) only works correctly with explicit *value* captures"
-#endif
-#endif
-
 template <class T>
 STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device)
 noexcept : array_size{ARRAY_SIZE}, range(0, array_size),
-#ifdef USE_VECTOR
-  a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
-#else
   a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
-#endif
 {
     std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
 #ifdef USE_ONEDPL
@@ -55,41 +33,39 @@ noexcept : array_size{ARRAY_SIZE}, range(0, array_size),
 
 template<class T>
 STDIndicesStream<T>::~STDIndicesStream() {
-#ifndef USE_VECTOR
-    dealloc_raw(a);
-    dealloc_raw(b);
-    dealloc_raw(c);
-#endif
+  dealloc_raw(a);
+  dealloc_raw(b);
+  dealloc_raw(c);
 }
 
 template <class T>
 void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
 {
-  std::fill(exe_policy, BEGIN(a), END(a), initA);
-  std::fill(exe_policy, BEGIN(b), END(b), initB);
-  std::fill(exe_policy, BEGIN(c), END(c), initC);
+  std::fill(exe_policy, a, a + array_size, initA);
+  std::fill(exe_policy, b, b + array_size, initB);
+  std::fill(exe_policy, c, c + array_size, initC);
 }
 
 template <class T>
 void STDIndicesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
 {
-  std::copy(BEGIN(a), END(a), h_a.begin());
-  std::copy(BEGIN(b), END(b), h_b.begin());
-  std::copy(BEGIN(c), END(c), h_c.begin());
+  std::copy(a, a + array_size, h_a.begin());
+  std::copy(b, b + array_size, h_b.begin());
+  std::copy(c, c + array_size, h_c.begin());
 }
 
 template <class T>
 void STDIndicesStream<T>::copy()
 {
   // c[i] = a[i]
-  std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c));
+  std::copy(exe_policy, a, a + array_size, c);
 }
 
 template <class T>
 void STDIndicesStream<T>::mul()
 {
   //  b[i] = scalar * c[i];
-  std::transform(exe_policy, range.begin(), range.end(), BEGIN(b), [c = this->c, scalar = startScalar](int i) {
+  std::transform(exe_policy, range.begin(), range.end(), b, [c = this->c, scalar = startScalar](int i) {
     return scalar * c[i];
   });
 }
@@ -98,7 +74,7 @@ template <class T>
 void STDIndicesStream<T>::add()
 {
   //  c[i] = a[i] + b[i];
-  std::transform(exe_policy, range.begin(), range.end(), BEGIN(c), [a = this->a, b = this->b](int i) {
+  std::transform(exe_policy, range.begin(), range.end(), c, [a = this->a, b = this->b](int i) {
     return a[i] + b[i];
   });
 }
@@ -107,7 +83,7 @@ template <class T>
 void STDIndicesStream<T>::triad()
 {
   //  a[i] = b[i] + scalar * c[i];
-  std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [b = this->b, c = this->c, scalar = startScalar](int i) {
+  std::transform(exe_policy, range.begin(), range.end(), a, [b = this->b, c = this->c, scalar = startScalar](int i) {
     return b[i] + scalar * c[i];
   });
 }
@@ -119,7 +95,7 @@ void STDIndicesStream<T>::nstream()
   //  Need to do in two stages with C++11 STL.
   //  1: a[i] += b[i]
   //  2: a[i] += scalar * c[i];
-  std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [a = this->a, b = this->b, c = this->c, scalar = startScalar](int i) {
+  std::transform(exe_policy, range.begin(), range.end(), a, [a = this->a, b = this->b, c = this->c, scalar = startScalar](int i) {
     return a[i] + b[i] + scalar * c[i];
   });
 }
@@ -129,7 +105,7 @@ template <class T>
 T STDIndicesStream<T>::dot()
 {
   // sum = 0; sum += a[i]*b[i]; return sum;
-  return std::transform_reduce(exe_policy, BEGIN(a), END(a), BEGIN(b), 0.0);
+  return std::transform_reduce(exe_policy, a, a + array_size, b, 0.0);
 }
 
 void listDevices(void)
@@ -148,6 +124,3 @@ std::string getDeviceDriver(const int)
 }
 template class STDIndicesStream<float>;
 template class STDIndicesStream<double>;
-
-#undef BEGIN
-#undef END
diff --git a/src/std-indices/STDIndicesStream.h b/src/std-indices/STDIndicesStream.h
index 0916ef2..ffab910 100644
--- a/src/std-indices/STDIndicesStream.h
+++ b/src/std-indices/STDIndicesStream.h
@@ -77,12 +77,7 @@ class STDIndicesStream : public Stream<T>
     ranged<int> range;
 
     // Device side pointers
-#ifdef USE_VECTOR
-    std::vector<T> a, b, c;
-#else
     T *a, *b, *c;
-#endif
-
 
   public:
     STDIndicesStream(const int, int) noexcept;
diff --git a/src/std-indices/model.cmake b/src/std-indices/model.cmake
index c2fef28..60ef575 100644
--- a/src/std-indices/model.cmake
+++ b/src/std-indices/model.cmake
@@ -3,10 +3,6 @@ register_flag_optional(CMAKE_CXX_COMPILER
         "Any CXX compiler that is supported by CMake detection"
         "c++")
 
-register_flag_optional(USE_VECTOR
-        "Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
-        "OFF")
-
 register_flag_optional(NVHPC_OFFLOAD
         "Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
          The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
@@ -47,9 +43,6 @@ macro(setup)
         register_append_cxx_flags(ANY ${NVHPC_FLAGS})
         register_append_link_flags(${NVHPC_FLAGS})
     endif ()
-    if (USE_VECTOR)
-        register_definitions(USE_VECTOR)
-    endif ()
     if (USE_TBB)
         register_link_library(TBB::tbb)
     endif ()
diff --git a/src/std-ranges/STDRangesStream.cpp b/src/std-ranges/STDRangesStream.cpp
index e05a7d1..d497691 100644
--- a/src/std-ranges/STDRangesStream.cpp
+++ b/src/std-ranges/STDRangesStream.cpp
@@ -5,27 +5,16 @@
 // source code
 
 #include "STDRangesStream.hpp"
+#include <ranges>
 
 #ifndef ALIGNMENT
 #define ALIGNMENT (2*1024*1024) // 2MB
 #endif
 
-#ifdef USE_VECTOR
-#define BEGIN(x) (x).begin()
-#define END(x) (x).end()
-#else
-#define BEGIN(x) (x)
-#define END(x) ((x) + array_size)
-#endif
-
 template <class T>
 STDRangesStream<T>::STDRangesStream(const int ARRAY_SIZE, int device)
 noexcept : array_size{ARRAY_SIZE},
-#ifdef USE_VECTOR
-  a(ARRAY_SIZE), b(ARRAY_SIZE), c(ARRAY_SIZE)
-#else
   a(alloc_raw<T>(ARRAY_SIZE)), b(alloc_raw<T>(ARRAY_SIZE)), c(alloc_raw<T>(ARRAY_SIZE))
-#endif
 {
     std::cout << "Backing storage typeid: " << typeid(a).name() << std::endl;
 #ifdef USE_ONEDPL
@@ -45,11 +34,9 @@ noexcept : array_size{ARRAY_SIZE},
 
 template<class T>
 STDRangesStream<T>::~STDRangesStream() {
-#ifndef USE_VECTOR
-    dealloc_raw(a);
-    dealloc_raw(b);
-    dealloc_raw(c);
-#endif
+  dealloc_raw(a);
+  dealloc_raw(b);
+  dealloc_raw(c);
 }
 
 template <class T>
@@ -70,9 +57,9 @@ template <class T>
 void STDRangesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
 {
   // Element-wise copy.
-    std::copy(BEGIN(a), END(a), h_a.begin());
-    std::copy(BEGIN(b), END(b), h_b.begin());
-    std::copy(BEGIN(c), END(c), h_c.begin());
+    std::copy(a, a + array_size, h_a.begin());
+    std::copy(b, b + array_size, h_b.begin());
+    std::copy(c, c + array_size, h_c.begin());
 }
 
 template <class T>
@@ -148,7 +135,7 @@ T STDRangesStream<T>::dot()
   return
     std::transform_reduce(
       exe_policy,
-      BEGIN(a), END(a), BEGIN(b), 0.0);
+      a, a + array_size, b, 0.0);
 }
 
 void listDevices(void)
@@ -168,6 +155,3 @@ std::string getDeviceDriver(const int)
 
 template class STDRangesStream<float>;
 template class STDRangesStream<double>;
-
-#undef BEGIN
-#undef END
diff --git a/src/std-ranges/STDRangesStream.hpp b/src/std-ranges/STDRangesStream.hpp
index 9d36d46..6e7c29c 100644
--- a/src/std-ranges/STDRangesStream.hpp
+++ b/src/std-ranges/STDRangesStream.hpp
@@ -21,11 +21,7 @@ class STDRangesStream : public Stream<T>
     int array_size;
 
     // Device side pointers
-#ifdef USE_VECTOR
-    std::vector<T> a, b, c;
-#else
     T *a, *b, *c;
-#endif
 
   public:
     STDRangesStream(const int, int) noexcept;
diff --git a/src/std-ranges/model.cmake b/src/std-ranges/model.cmake
index 35554c7..8f73501 100644
--- a/src/std-ranges/model.cmake
+++ b/src/std-ranges/model.cmake
@@ -3,10 +3,6 @@ register_flag_optional(CMAKE_CXX_COMPILER
         "Any CXX compiler that is supported by CMake detection and supports C++20 Ranges"
         "c++")
 
-register_flag_optional(USE_VECTOR
-        "Whether to use std::vector<T> for storage or use aligned_alloc. C++ vectors are *zero* initialised where as aligned_alloc is uninitialised before first use."
-        "OFF")
-
 register_flag_optional(USE_TBB
         "No-op if ONE_TBB_DIR is set. Link against an in-tree oneTBB via FetchContent_Declare, see top level CMakeLists.txt for details."
         "OFF")
@@ -32,10 +28,7 @@ macro(setup)
     set(CMAKE_CXX_STANDARD_REQUIRED OFF)
     unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default
     # and append our own:
-    register_append_cxx_flags(ANY -std=c++2a)
-    if (USE_VECTOR)
-        register_definitions(USE_VECTOR)
-    endif ()
+    register_append_cxx_flags(ANY -std=c++20)
     if (USE_TBB)
         register_link_library(TBB::tbb)
     endif ()
@@ -44,3 +37,10 @@ macro(setup)
         register_link_library(oneDPL)
     endif ()
 endmacro()
+
+macro(setup_target NAME)
+    if (USE_ONEDPL)
+        target_compile_features(${NAME} INTERFACE cxx_std_20)
+        target_compile_features(oneDPL INTERFACE cxx_std_20)
+    endif ()
+endmacro()
diff --git a/src/thrust/model.cmake b/src/thrust/model.cmake
index 91821ef..6b82ef5 100644
--- a/src/thrust/model.cmake
+++ b/src/thrust/model.cmake
@@ -46,11 +46,12 @@ macro(setup)
         # see CUDA.cmake, we're only adding a few Thrust related libraries here
 
         if (POLICY CMP0104)
-            cmake_policy(SET CMP0104 OLD)
+            cmake_policy(SET CMP0104 NEW)
         endif ()
 
+        set(CMAKE_CUDA_ARCHITECTURES  ${CUDA_ARCH})
         # add -forward-unknown-to-host-compiler for compatibility reasons
-        set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--expt-extended-lambda -forward-unknown-to-host-compiler -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS})
+        set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--expt-extended-lambda " ${CUDA_EXTRA_FLAGS})
         enable_language(CUDA)
         # CMake defaults to -O2 for CUDA at Release, let's wipe that and use the global RELEASE_FLAG
         # appended later
@@ -63,6 +64,7 @@ macro(setup)
         # XXX NVHPC >= 22.3 has cub-config in `Linux_x86_64/22.3/cuda/11.6/lib64/cmake/cub/`
         # same thing for thrust
         if (SDK_DIR)
+            list(APPEND CMAKE_PREFIX_PATH ${SDK_DIR})
             find_package(CUB REQUIRED CONFIG PATHS ${SDK_DIR}/cub)
             find_package(Thrust REQUIRED CONFIG PATHS ${SDK_DIR}/thrust)
         else ()
@@ -73,9 +75,11 @@ macro(setup)
         message(STATUS "Using Thrust backend: ${BACKEND}")
 
         # this creates the interface that we can link to
-        thrust_create_target(Thrust HOST CPP DEVICE ${BACKEND})
+        thrust_create_target(Thrust${BACKEND}
+                HOST CPP
+                DEVICE ${BACKEND})
 
-        register_link_library(Thrust)
+        register_link_library(Thrust${BACKEND})
     elseif (${THRUST_IMPL} STREQUAL "ROCM")
         if (SDK_DIR)
             find_package(rocprim REQUIRED CONFIG PATHS ${SDK_DIR}/rocprim)