Merge branch 'develop' into option_for_vec

This commit is contained in:
Tom Lin 2023-09-05 02:44:43 +01:00
commit b25fd755a6
4 changed files with 39 additions and 3 deletions

View File

@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
## Unreleased ## Unreleased
### Added ### Added
- Ability to build Kokkos and RAJA versions against existing packages. - Ability to build Kokkos and RAJA versions against existing packages.
- Thrust managed memory.
### Changed ### Changed
- RAJA CUDA CMake build issues resolved. - RAJA CUDA CMake build issues resolved.

View File

@ -8,6 +8,8 @@ register_flag_optional(RAJA_IN_TREE
Make sure to use the release version of RAJA or clone RAJA recursively with submodules. Make sure to use the release version of RAJA or clone RAJA recursively with submodules.
Remember to append RAJA specific flags as well, for example: Remember to append RAJA specific flags as well, for example:
-DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ... -DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ...
For RAJA >= v2022.03.0, remember to use the RAJA prefixed CMake options:
-DRAJA_IN_TREE=... -DRAJA_ENABLE_OPENMP=ON -DRAJA_ENABLE_CUDA=ON ...
See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options
" "") " "")
@ -20,7 +22,7 @@ register_flag_optional(TARGET
CPU) CPU)
register_flag_optional(CUDA_TOOLKIT_ROOT_DIR register_flag_optional(CUDA_TOOLKIT_ROOT_DIR
"[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the ENABLE_CUDA flag is specified for RAJA" "") "[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the RAJA_ENABLE_CUDA or ENABLE_CUDA flag is specified for RAJA" "")
# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes # XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
register_flag_optional(CUDA_ARCH register_flag_optional(CUDA_ARCH
@ -58,7 +60,20 @@ macro(setup)
set(ENABLE_BENCHMARKS OFF CACHE BOOL "") set(ENABLE_BENCHMARKS OFF CACHE BOOL "")
set(ENABLE_CUDA ${ENABLE_CUDA} CACHE BOOL "" FORCE) set(ENABLE_CUDA ${ENABLE_CUDA} CACHE BOOL "" FORCE)
if (ENABLE_CUDA) # RAJA >= v2022.03.0 switched to prefixed variables, we keep the legacy ones for backwards compatibiity
set(RAJA_ENABLE_TESTS OFF CACHE BOOL "")
set(RAJA_ENABLE_EXAMPLES OFF CACHE BOOL "")
set(RAJA_ENABLE_REPRODUCERS OFF CACHE BOOL "")
set(RAJA_ENABLE_EXERCISES OFF CACHE BOOL "")
set(RAJA_ENABLE_DOCUMENTATION OFF CACHE BOOL "")
set(RAJA_ENABLE_BENCHMARKS OFF CACHE BOOL "")
set(RAJA_ENABLE_CUDA ${RAJA_ENABLE_CUDA} CACHE BOOL "" FORCE)
if (ENABLE_CUDA OR RAJA_ENABLE_CUDA)
# RAJA still needs ENABLE_CUDA for internal use, so if either is on, assert both.
set(RAJA_ENABLE_CUDA ON)
set(ENABLE_CUDA ON)
# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes # XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
if(POLICY CMP0104) if(POLICY CMP0104)
@ -70,6 +85,10 @@ macro(setup)
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS}) set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS})
list(APPEND CMAKE_CUDA_FLAGS) list(APPEND CMAKE_CUDA_FLAGS)
# See https://github.com/LLNL/RAJA/pull/1302
# And https://github.com/LLNL/RAJA/pull/1339
set(RAJA_ENABLE_VECTORIZATION OFF CACHE BOOL "")
message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}") message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}")
endif () endif ()

View File

@ -8,7 +8,11 @@
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#if defined(MANAGED)
#include <thrust/universal_vector.h>
#else
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#endif
#include "Stream.h" #include "Stream.h"
@ -21,9 +25,15 @@ class ThrustStream : public Stream<T>
// Size of arrays // Size of arrays
int array_size; int array_size;
#if defined(MANAGED)
thrust::universtal_vector<T> a;
thrust::universtal_vector<T> b;
thrust::universtal_vector<T> c;
#else
thrust::device_vector<T> a; thrust::device_vector<T> a;
thrust::device_vector<T> b; thrust::device_vector<T> b;
thrust::device_vector<T> c; thrust::device_vector<T> c;
#endif
public: public:
ThrustStream(const int, int); ThrustStream(const int, int);

View File

@ -18,6 +18,9 @@ register_flag_optional(BACKEND
" "
"CUDA") "CUDA")
register_flag_optional(MANAGED "Enabled managed memory mode."
"OFF")
register_flag_optional(CMAKE_CUDA_COMPILER register_flag_optional(CMAKE_CUDA_COMPILER
"[THRUST_IMPL==CUDA] Path to the CUDA nvcc compiler" "[THRUST_IMPL==CUDA] Path to the CUDA nvcc compiler"
"") "")
@ -34,6 +37,9 @@ register_flag_optional(CUDA_EXTRA_FLAGS
macro(setup) macro(setup)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 14)
if (MANAGED)
register_definitions(MANAGED)
endif ()
if (${THRUST_IMPL} STREQUAL "CUDA") if (${THRUST_IMPL} STREQUAL "CUDA")
@ -91,4 +97,4 @@ macro(setup)
endmacro() endmacro()