Merge branch 'develop' into option_for_vec

2023-09-05 02:44:43 +01:00 · 2023-09-05 02:44:43 +01:00 · b25fd755a6
commit b25fd755a6
parent 28dcf6f962 6d11c72382
4 changed files with 39 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
 ## Unreleased
 ### Added
 - Ability to build Kokkos and RAJA versions against existing packages.
+- Thrust managed memory.

 ### Changed
 - RAJA CUDA CMake build issues resolved.
--- a/src/raja/model.cmake
+++ b/src/raja/model.cmake
@ -8,6 +8,8 @@ register_flag_optional(RAJA_IN_TREE
         Make sure to use the release version of RAJA or clone RAJA recursively with submodules.
         Remember to append RAJA specific flags as well, for example:
             -DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ...
+         For RAJA >= v2022.03.0, remember to use the RAJA prefixed CMake options:
+             -DRAJA_IN_TREE=... -DRAJA_ENABLE_OPENMP=ON -DRAJA_ENABLE_CUDA=ON ...
         See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options
 " "")

@ -20,7 +22,7 @@ register_flag_optional(TARGET
        CPU)

 register_flag_optional(CUDA_TOOLKIT_ROOT_DIR
-        "[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the ENABLE_CUDA flag is specified for RAJA" "")
+        "[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the RAJA_ENABLE_CUDA or ENABLE_CUDA flag is specified for RAJA" "")

 # XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
 register_flag_optional(CUDA_ARCH
@ -58,7 +60,20 @@ macro(setup)
        set(ENABLE_BENCHMARKS OFF CACHE BOOL "")
        set(ENABLE_CUDA ${ENABLE_CUDA} CACHE BOOL "" FORCE)

-        if (ENABLE_CUDA)
+        # RAJA >= v2022.03.0 switched to prefixed variables, we keep the legacy ones for backwards compatibiity
+        set(RAJA_ENABLE_TESTS OFF CACHE BOOL "")
+        set(RAJA_ENABLE_EXAMPLES OFF CACHE BOOL "")
+        set(RAJA_ENABLE_REPRODUCERS OFF CACHE BOOL "")
+        set(RAJA_ENABLE_EXERCISES OFF CACHE BOOL "")
+        set(RAJA_ENABLE_DOCUMENTATION OFF CACHE BOOL "")
+        set(RAJA_ENABLE_BENCHMARKS OFF CACHE BOOL "")
+        set(RAJA_ENABLE_CUDA ${RAJA_ENABLE_CUDA} CACHE BOOL "" FORCE)
+
+        if (ENABLE_CUDA OR RAJA_ENABLE_CUDA)
+
+            # RAJA still needs ENABLE_CUDA for internal use, so if either is on, assert both.
+            set(RAJA_ENABLE_CUDA ON)
+            set(ENABLE_CUDA ON)

            # XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
            if(POLICY CMP0104)
@ -70,6 +85,10 @@ macro(setup)
            set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS})
            list(APPEND CMAKE_CUDA_FLAGS)

+            # See https://github.com/LLNL/RAJA/pull/1302
+            # And https://github.com/LLNL/RAJA/pull/1339
+            set(RAJA_ENABLE_VECTORIZATION OFF CACHE BOOL "")
+
            message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}")
        endif ()

--- a/src/thrust/ThrustStream.h
+++ b/src/thrust/ThrustStream.h
@ -8,7 +8,11 @@

 #include <iostream>
 #include <vector>
+#if defined(MANAGED)
+#include <thrust/universal_vector.h>
+#else
 #include <thrust/device_vector.h>
+#endif

 #include "Stream.h"

@ -21,9 +25,15 @@ class ThrustStream : public Stream<T>
    // Size of arrays
    int array_size;

+  #if defined(MANAGED)
+    thrust::universtal_vector<T> a;
+    thrust::universtal_vector<T> b;
+    thrust::universtal_vector<T> c;
+  #else
    thrust::device_vector<T> a;
    thrust::device_vector<T> b;
    thrust::device_vector<T> c;
+  #endif

  public:
    ThrustStream(const int, int);
--- a/src/thrust/model.cmake
+++ b/src/thrust/model.cmake
@ -18,6 +18,9 @@ register_flag_optional(BACKEND
        "
        "CUDA")

+      register_flag_optional(MANAGED "Enabled managed memory mode."
+        "OFF")
+
 register_flag_optional(CMAKE_CUDA_COMPILER
        "[THRUST_IMPL==CUDA] Path to the CUDA nvcc compiler"
        "")
@ -34,6 +37,9 @@ register_flag_optional(CUDA_EXTRA_FLAGS

 macro(setup)
    set(CMAKE_CXX_STANDARD 14)
+    if (MANAGED)
+      register_definitions(MANAGED)
+    endif ()

    if (${THRUST_IMPL} STREQUAL "CUDA")

@ -91,4 +97,4 @@ macro(setup)
 endmacro()


- 
+