Merge branch 'develop' into option_for_vec

2023-09-05 02:44:43 +01:00 · 2023-09-05 02:44:43 +01:00 · b25fd755a6
commit b25fd755a6
parent 28dcf6f962 6d11c72382
4 changed files with 39 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
 ## Unreleased
 ### Added
 - Ability to build Kokkos and RAJA versions against existing packages.
 - Thrust managed memory.
 ### Changed
 - RAJA CUDA CMake build issues resolved.
--- a/src/raja/model.cmake
+++ b/src/raja/model.cmake
@ -8,6 +8,8 @@ register_flag_optional(RAJA_IN_TREE
         Make sure to use the release version of RAJA or clone RAJA recursively with submodules.
         Remember to append RAJA specific flags as well, for example:
             -DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ...
         For RAJA >= v2022.03.0, remember to use the RAJA prefixed CMake options:
             -DRAJA_IN_TREE=... -DRAJA_ENABLE_OPENMP=ON -DRAJA_ENABLE_CUDA=ON ...
         See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options
 " "")
@ -20,7 +22,7 @@ register_flag_optional(TARGET
        CPU)
 register_flag_optional(CUDA_TOOLKIT_ROOT_DIR
-        "[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the ENABLE_CUDA flag is specified for RAJA" "")
+        "[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the RAJA_ENABLE_CUDA or ENABLE_CUDA flag is specified for RAJA" "")
 # XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
 register_flag_optional(CUDA_ARCH
@ -58,7 +60,20 @@ macro(setup)
        set(ENABLE_BENCHMARKS OFF CACHE BOOL "")
        set(ENABLE_CUDA ${ENABLE_CUDA} CACHE BOOL "" FORCE)
-        if (ENABLE_CUDA)
+        # RAJA >= v2022.03.0 switched to prefixed variables, we keep the legacy ones for backwards compatibiity
        set(RAJA_ENABLE_TESTS OFF CACHE BOOL "")
        set(RAJA_ENABLE_EXAMPLES OFF CACHE BOOL "")
        set(RAJA_ENABLE_REPRODUCERS OFF CACHE BOOL "")
        set(RAJA_ENABLE_EXERCISES OFF CACHE BOOL "")
        set(RAJA_ENABLE_DOCUMENTATION OFF CACHE BOOL "")
        set(RAJA_ENABLE_BENCHMARKS OFF CACHE BOOL "")
        set(RAJA_ENABLE_CUDA ${RAJA_ENABLE_CUDA} CACHE BOOL "" FORCE)
        if (ENABLE_CUDA OR RAJA_ENABLE_CUDA)
            # RAJA still needs ENABLE_CUDA for internal use, so if either is on, assert both.
            set(RAJA_ENABLE_CUDA ON)
            set(ENABLE_CUDA ON)
            # XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
            if(POLICY CMP0104)
@ -70,6 +85,10 @@ macro(setup)
            set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS})
            list(APPEND CMAKE_CUDA_FLAGS)
            # See https://github.com/LLNL/RAJA/pull/1302
            # And https://github.com/LLNL/RAJA/pull/1339
            set(RAJA_ENABLE_VECTORIZATION OFF CACHE BOOL "")
            message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}")
        endif ()
--- a/src/thrust/ThrustStream.h
+++ b/src/thrust/ThrustStream.h
@ -8,7 +8,11 @@
 #include <iostream>
 #include <vector>
 #if defined(MANAGED)
 #include <thrust/universal_vector.h>
 #else
 #include <thrust/device_vector.h>
 #endif
 #include "Stream.h"
@ -21,9 +25,15 @@ class ThrustStream : public Stream<T>
    // Size of arrays
    int array_size;
  #if defined(MANAGED)
    thrust::universtal_vector<T> a;
    thrust::universtal_vector<T> b;
    thrust::universtal_vector<T> c;
  #else
    thrust::device_vector<T> a;
    thrust::device_vector<T> b;
    thrust::device_vector<T> c;
  #endif
  public:
    ThrustStream(const int, int);
--- a/src/thrust/model.cmake
+++ b/src/thrust/model.cmake
@ -18,6 +18,9 @@ register_flag_optional(BACKEND
        "
        "CUDA")
      register_flag_optional(MANAGED "Enabled managed memory mode."
        "OFF")
 register_flag_optional(CMAKE_CUDA_COMPILER
        "[THRUST_IMPL==CUDA] Path to the CUDA nvcc compiler"
        "")
@ -34,6 +37,9 @@ register_flag_optional(CUDA_EXTRA_FLAGS
 macro(setup)
    set(CMAKE_CXX_STANDARD 14)
    if (MANAGED)
      register_definitions(MANAGED)
    endif ()
    if (${THRUST_IMPL} STREQUAL "CUDA")