Merge branch 'develop' of github.com:UoB-HPC/BabelStream into develop

This commit is contained in:
Tom Lin 2022-10-01 04:57:28 +08:00
commit 2c5eee4840
5 changed files with 37 additions and 21 deletions

View File

@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file. All notable changes to this project will be documented in this file.
## Unreleased ## Unreleased
### Added
- Ability to build Kokkos and RAJA versions against existing packages.
### Changed
- RAJA CUDA CMake build issues resolved.
- Fix CUDA memory limit check. - Fix CUDA memory limit check.
- Use long double for `check_solution` in case of large problem size. - Use long double for `check_solution` in case of large problem size.

View File

@ -125,7 +125,7 @@ message(STATUS "Default ${CMAKE_BUILD_TYPE} flags are `${DEFAULT_${BUILD_TYPE}_F
# setup common build flag defaults if there are no overrides # setup common build flag defaults if there are no overrides
if (NOT DEFINED ${BUILD_TYPE}_FLAGS) if (NOT DEFINED ${BUILD_TYPE}_FLAGS)
set(ACTUAL_${BUILD_TYPE}_FLAGS ${DEFAULT_${BUILD_TYPE}_FLAGS}) set(ACTUAL_${BUILD_TYPE}_FLAGS ${DEFAULT_${BUILD_TYPE}_FLAGS})
elseif () else ()
set(ACTUAL_${BUILD_TYPE}_FLAGS ${${BUILD_TYPE}_FLAGS}) set(ACTUAL_${BUILD_TYPE}_FLAGS ${${BUILD_TYPE}_FLAGS})
endif () endif ()

View File

@ -100,7 +100,7 @@ The source for each model's implementations are located in `./src/<model>`.
Currently available models are: Currently available models are:
``` ```
omp;ocl;std;std20;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust
``` ```
#### Overriding default flags #### Overriding default flags
@ -163,11 +163,11 @@ Pull Requests should be made against the `develop` branch.
Please cite BabelStream via this reference: Please cite BabelStream via this reference:
Deakin T, Price J, Martineau M, McIntosh-Smith S. GPU-STREAM v2.0: Benchmarking the achievable memory bandwidth of many-core processors across diverse parallel programming models. 2016. Paper presented at P^3MA Workshop at ISC High Performance, Frankfurt, Germany. DOI: 10.1007/978- 3-319-46079-6_34 Deakin T, Price J, Martineau M, McIntosh-Smith S. Evaluating attainable memory bandwidth of parallel programming models via BabelStream. International Journal of Computational Science and Engineering. Special issue. Vol. 17, No. 3, pp. 247262. 2018. DOI: 10.1504/IJCSE.2018.095847
### Other BabelStream publications ### Other BabelStream publications
* Deakin T, Price J, Martineau M, McIntosh-Smith S. Evaluating attainable memory bandwidth of parallel programming models via BabelStream. International Journal of Computational Science and Engineering. Special issue. Vol. 17, No. 3, pp. 247262. 2018.DOI: 10.1504/IJCSE.2018.095847 * Deakin T, Price J, Martineau M, McIntosh-Smith S. GPU-STREAM v2.0: Benchmarking the achievable memory bandwidth of many-core processors across diverse parallel programming models. 2016. Paper presented at P^3MA Workshop at ISC High Performance, Frankfurt, Germany. DOI: 10.1007/978- 3-319-46079-6_34
* Deakin T, McIntosh-Smith S. GPU-STREAM: Benchmarking the achievable memory bandwidth of Graphics Processing Units. 2015. Poster session presented at IEEE/ACM SuperComputing, Austin, United States. * Deakin T, McIntosh-Smith S. GPU-STREAM: Benchmarking the achievable memory bandwidth of Graphics Processing Units. 2015. Poster session presented at IEEE/ACM SuperComputing, Austin, United States.
You can view the [Poster and Extended Abstract](http://sc15.supercomputing.org/sites/all/themes/SC15images/tech_poster/tech_poster_pages/post150.html). You can view the [Poster and Extended Abstract](http://sc15.supercomputing.org/sites/all/themes/SC15images/tech_poster/tech_poster_pages/post150.html).

View File

@ -1,16 +1,17 @@
register_flag_optional(CMAKE_CXX_COMPILER register_flag_optional(CMAKE_CXX_COMPILER
"Any CXX compiler that is supported by CMake detection and RAJA. "Any CXX compiler that is supported by CMake detection and RAJA.
See https://github.com/kokkos/kokkos#primary-tested-compilers-on-x86-are" See https://github.com/kokkos/kokkos#primary-tested-compilers-on-x86-are"
"c++") "c++")
register_flag_required(KOKKOS_IN_TREE register_flag_optional(KOKKOS_IN_TREE
"Absolute path to the *source* distribution directory of Kokkos. "Absolute path to the *source* distribution directory of Kokkos.
Remember to append Kokkos specific flags as well, for example: Remember to append Kokkos specific flags as well, for example:
-DKOKKOS_IN_TREE=... -DKokkos_ENABLE_OPENMP=ON -DKokkos_ARCH_ZEN=ON ... -DKOKKOS_IN_TREE=... -DKokkos_ENABLE_OPENMP=ON -DKokkos_ARCH_ZEN=ON ...
See https://github.com/kokkos/kokkos/blob/master/BUILD.md for all available options" "")
See https://github.com/kokkos/kokkos/blob/master/BUILD.md for all available options") register_flag_optional(KOKKOS_IN_PACKAGE
"Use if Kokkos is part of a package dependency:
Path to package R-Path containing Kokkos libs" "")
# compiler vendor and arch specific flags # compiler vendor and arch specific flags
set(KOKKOS_FLAGS_CPU_INTEL -qopt-streaming-stores=always) set(KOKKOS_FLAGS_CPU_INTEL -qopt-streaming-stores=always)
@ -20,13 +21,18 @@ macro(setup)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 14)
cmake_policy(SET CMP0074 NEW) #see https://github.com/kokkos/kokkos/blob/master/BUILD.md cmake_policy(SET CMP0074 NEW) #see https://github.com/kokkos/kokkos/blob/master/BUILD.md
message(STATUS "Building using in-tree Kokkos source at `${KOKKOS_IN_TREE}`")
if (EXISTS "${KOKKOS_IN_TREE}") if (EXISTS "${KOKKOS_IN_TREE}")
message(STATUS "Building using in-tree Kokkos source at `${KOKKOS_IN_TREE}`")
add_subdirectory(${KOKKOS_IN_TREE} ${CMAKE_BINARY_DIR}/kokkos) add_subdirectory(${KOKKOS_IN_TREE} ${CMAKE_BINARY_DIR}/kokkos)
register_link_library(Kokkos::kokkos) register_link_library(Kokkos::kokkos)
elseif (EXISTS "${KOKKOS_IN_PACKAGE}")
message(STATUS "Building using packaged Kokkos at `${KOKKOS_IN_PACKAGE}`")
set (Kokkos_DIR "${KOKKOS_IN_PACKAGE}/lib64/cmake/Kokkos")
find_package(Kokkos REQUIRED)
register_link_library(Kokkos::kokkos)
else() else()
message(FATAL_ERROR "`${KOKKOS_IN_TREE}` does not exist") message(FATAL_ERROR "Neither `${KOKKOS_IN_TREE}`, or `${KOKKOS_IN_PACKAGE}` exists")
endif () endif ()
register_append_compiler_and_arch_specific_cxx_flags( register_append_compiler_and_arch_specific_cxx_flags(
@ -36,5 +42,3 @@ macro(setup)
) )
endmacro() endmacro()

View File

@ -1,18 +1,19 @@
register_flag_optional(CMAKE_CXX_COMPILER register_flag_optional(CMAKE_CXX_COMPILER
"Any CXX compiler that is supported by CMake detection and RAJA. "Any CXX compiler that is supported by CMake detection and RAJA.
See https://raja.readthedocs.io/en/main/getting_started.html#build-and-install" See https://raja.readthedocs.io/en/main/getting_started.html#build-and-install"
"c++") "c++")
register_flag_required(RAJA_IN_TREE register_flag_optional(RAJA_IN_TREE
"Absolute path to the *source* distribution directory of RAJA. "Absolute path to the *source* distribution directory of RAJA.
Make sure to use the release version of RAJA or clone RAJA recursively with submodules. Make sure to use the release version of RAJA or clone RAJA recursively with submodules.
Remember to append RAJA specific flags as well, for example: Remember to append RAJA specific flags as well, for example:
-DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ... -DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ...
See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options
") " "")
register_flag_optional(RAJA_IN_PACKAGE
"Use if Raja is part of a package dependency:
Path to installation" "")
register_flag_optional(TARGET register_flag_optional(TARGET
"Target offload device, implemented values are CPU, NVIDIA" "Target offload device, implemented values are CPU, NVIDIA"
@ -76,16 +77,22 @@ macro(setup)
register_link_library(RAJA) register_link_library(RAJA)
# RAJA's cmake screws with where the binary will end up, resetting it here: # RAJA's cmake screws with where the binary will end up, resetting it here:
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
elseif (EXISTS "${RAJA_IN_PACKAGE}")
message(STATUS "Building using packaged Raja at `${RAJA_IN_PACKAGE}`")
find_package(RAJA REQUIRED)
register_link_library(RAJA)
else () else ()
message(FATAL_ERROR "`${RAJA_IN_TREE}` does not exist") message(FATAL_ERROR "Neither `${RAJA_IN_TREE}` or `${RAJA_IN_PACKAGE}` exists")
endif () endif ()
if (ENABLE_CUDA) if (ENABLE_CUDA)
# RAJA needs the codebase to be compiled with nvcc, so we tell cmake to treat sources as *.cu # RAJA needs the codebase to be compiled with nvcc, so we tell cmake to treat sources as *.cu
enable_language(CUDA) enable_language(CUDA)
set_source_files_properties(RAJAStream.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(src/raja/RAJAStream.cpp PROPERTIES LANGUAGE CUDA)
set_source_files_properties(main.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(src/main.cpp PROPERTIES LANGUAGE CUDA)
endif () endif ()