Merge branch 'develop' into option_for_vec

This commit is contained in:
Tom Lin 2023-09-05 02:44:43 +01:00
commit b25fd755a6
4 changed files with 39 additions and 3 deletions

View File

@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
## Unreleased
### Added
- Ability to build Kokkos and RAJA versions against existing packages.
- Thrust managed memory.
### Changed
- RAJA CUDA CMake build issues resolved.

View File

@ -8,6 +8,8 @@ register_flag_optional(RAJA_IN_TREE
Make sure to use the release version of RAJA or clone RAJA recursively with submodules.
Remember to append RAJA specific flags as well, for example:
-DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ...
For RAJA >= v2022.03.0, remember to use the RAJA prefixed CMake options:
-DRAJA_IN_TREE=... -DRAJA_ENABLE_OPENMP=ON -DRAJA_ENABLE_CUDA=ON ...
See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options
" "")
@ -20,7 +22,7 @@ register_flag_optional(TARGET
CPU)
register_flag_optional(CUDA_TOOLKIT_ROOT_DIR
"[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the ENABLE_CUDA flag is specified for RAJA" "")
"[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the RAJA_ENABLE_CUDA or ENABLE_CUDA flag is specified for RAJA" "")
# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
register_flag_optional(CUDA_ARCH
@ -58,7 +60,20 @@ macro(setup)
set(ENABLE_BENCHMARKS OFF CACHE BOOL "")
set(ENABLE_CUDA ${ENABLE_CUDA} CACHE BOOL "" FORCE)
if (ENABLE_CUDA)
# RAJA >= v2022.03.0 switched to prefixed variables, we keep the legacy ones for backwards compatibiity
set(RAJA_ENABLE_TESTS OFF CACHE BOOL "")
set(RAJA_ENABLE_EXAMPLES OFF CACHE BOOL "")
set(RAJA_ENABLE_REPRODUCERS OFF CACHE BOOL "")
set(RAJA_ENABLE_EXERCISES OFF CACHE BOOL "")
set(RAJA_ENABLE_DOCUMENTATION OFF CACHE BOOL "")
set(RAJA_ENABLE_BENCHMARKS OFF CACHE BOOL "")
set(RAJA_ENABLE_CUDA ${RAJA_ENABLE_CUDA} CACHE BOOL "" FORCE)
if (ENABLE_CUDA OR RAJA_ENABLE_CUDA)
# RAJA still needs ENABLE_CUDA for internal use, so if either is on, assert both.
set(RAJA_ENABLE_CUDA ON)
set(ENABLE_CUDA ON)
# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
if(POLICY CMP0104)
@ -70,6 +85,10 @@ macro(setup)
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS})
list(APPEND CMAKE_CUDA_FLAGS)
# See https://github.com/LLNL/RAJA/pull/1302
# And https://github.com/LLNL/RAJA/pull/1339
set(RAJA_ENABLE_VECTORIZATION OFF CACHE BOOL "")
message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}")
endif ()

View File

@ -8,7 +8,11 @@
#include <iostream>
#include <vector>
#if defined(MANAGED)
#include <thrust/universal_vector.h>
#else
#include <thrust/device_vector.h>
#endif
#include "Stream.h"
@ -21,9 +25,15 @@ class ThrustStream : public Stream<T>
// Size of arrays
int array_size;
#if defined(MANAGED)
thrust::universtal_vector<T> a;
thrust::universtal_vector<T> b;
thrust::universtal_vector<T> c;
#else
thrust::device_vector<T> a;
thrust::device_vector<T> b;
thrust::device_vector<T> c;
#endif
public:
ThrustStream(const int, int);

View File

@ -18,6 +18,9 @@ register_flag_optional(BACKEND
"
"CUDA")
register_flag_optional(MANAGED "Enabled managed memory mode."
"OFF")
register_flag_optional(CMAKE_CUDA_COMPILER
"[THRUST_IMPL==CUDA] Path to the CUDA nvcc compiler"
"")
@ -34,6 +37,9 @@ register_flag_optional(CUDA_EXTRA_FLAGS
macro(setup)
set(CMAKE_CXX_STANDARD 14)
if (MANAGED)
register_definitions(MANAGED)
endif ()
if (${THRUST_IMPL} STREQUAL "CUDA")
@ -91,4 +97,4 @@ macro(setup)
endmacro()