Use ./src instead of ./cpp

Create subdir for each cpp-based implementation
This commit is contained in:
Tom Lin 2021-05-26 17:46:07 +01:00
parent 60d4cb8c87
commit 5318404249
76 changed files with 163 additions and 196 deletions

View File

@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
defaults: defaults:
run: run:
working-directory: ./cpp working-directory: ./src
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -16,7 +16,7 @@ jobs:
uses: actions/cache@v2 uses: actions/cache@v2
with: with:
path: compilers path: compilers
key: ${{ runner.os }}-${{ hashFiles('./cpp/ci-prepare-bionic.sh') }} key: ${{ runner.os }}-${{ hashFiles('./src/ci-prepare-bionic.sh') }}
- name: Prepare compilers - name: Prepare compilers
if: steps.prepare-compilers.outputs.cache-hit != 'true' if: steps.prepare-compilers.outputs.cache-hit != 'true'

View File

@ -63,20 +63,22 @@ The project supports building with CMake >= 3.13.0, it can be installed without
As with any CMake project, first configure the project: As with any CMake project, first configure the project:
```shell ```shell
> cd babelstream/cpp > cd babelstream/src
> cmake -Bbuild -H. -DMODEL=<model> <model specific flags prefixed with -D...> # configure the build, build type defaults to Release > cmake -Bbuild -H. -DMODEL=<model> <model specific flags prefixed with -D...> # configure the build, build type defaults to Release
> cmake --build build # compile it > cmake --build build # compile it
> ./build/babelstream # executable available at ./build/ > ./build/<model>-stream # executable available at ./build/
``` ```
Source for each model's implementations are located in `./src/<model>`.
By default, we have defined a set of optimal flags for known HPC compilers. By default, we have defined a set of optimal flags for known HPC compilers.
There are assigned those to `RELEASE_FLAGS`, and you can override them if required. There are assigned those to `RELEASE_FLAGS`, and you can override them if required.
To find out what flag each model supports or requires, simply configure while only specifying the model. To find out what flag each model supports or requires, simply configure while only specifying the model.
For example: For example:
```shell ```shell
> cd babelstream/cpp > cd babelstream/src
> cmake -Bbuild -H. -DMODEL=OCL > cmake -Bbuild -H. -DMODEL=ocl
... ...
- Common Release flags are `-O3`, set RELEASE_FLAGS to override - Common Release flags are `-O3`, set RELEASE_FLAGS to override
-- CXX_EXTRA_FLAGS: -- CXX_EXTRA_FLAGS:
@ -90,8 +92,8 @@ For example:
Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`) Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)
-- CXX_EXTRA_LINKER_FLAGS: -- CXX_EXTRA_LINKER_FLAGS:
Append to linker flags (i.e GCC's `-Wl` or equivalent) Append to linker flags (i.e GCC's `-Wl` or equivalent)
-- Available models: OMP;OCL;STD;STD20;HIP;CUDA;KOKKOS;SYCL;ACC;RAJA -- Available models: omp;ocl;std;std20;hip;cuda;kokkos;sycl;acc;raja
-- Selected model : OCL -- Selected model : ocl
-- Supported flags: -- Supported flags:
CMAKE_CXX_COMPILER (optional, default=c++): Any CXX compiler that is supported by CMake detection CMAKE_CXX_COMPILER (optional, default=c++): Any CXX compiler that is supported by CMake detection
@ -107,7 +109,7 @@ Alternatively, refer to the [CI script](./ci-test-compile.sh), which test-compil
We have supplied a series of Makefiles, one for each programming model, to assist with building. We have supplied a series of Makefiles, one for each programming model, to assist with building.
The Makefiles contain common build options, and should be simple to customise for your needs too. The Makefiles contain common build options, and should be simple to customise for your needs too.
General usage is `make -f <Model>.make` General usage is `make -C src/<model>`
Common compiler flags and names can be set by passing a `COMPILER` option to Make, e.g. `make COMPILER=GNU`. Common compiler flags and names can be set by passing a `COMPILER` option to Make, e.g. `make COMPILER=GNU`.
Some models allow specifying a CPU or GPU style target, and this can be set by passing a `TARGET` option to Make, e.g. `make TARGET=GPU`. Some models allow specifying a CPU or GPU style target, and this can be set by passing a `TARGET` option to Make, e.g. `make TARGET=GPU`.
@ -125,7 +127,7 @@ cd
wget https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz wget https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz
tar -xvf 3.1.01.tar.gz # should end up with ~/kokkos-3.1.01 tar -xvf 3.1.01.tar.gz # should end up with ~/kokkos-3.1.01
cd BabelStream cd BabelStream
make -f Kokkos.make KOKKOS_PATH=~/kokkos-3.1.01 make -C src/kokkos KOKKOS_PATH=~/kokkos-3.1.01
``` ```
See make output for more information on supported flags. See make output for more information on supported flags.

30
cpp/.gitignore vendored
View File

@ -1,30 +0,0 @@
cuda-stream
ocl-stream
omp-stream
acc-stream
raja-stream
kokkos-stream
std-stream
sycl-stream
hip-stream
*.o
*.bc
*.sycl
*.tar
*.gz
*.a
KokkosCore_config.*
.DS_Store
Makefile
build/
cmake-build-*/
CMakeFiles/
.idea/
.vscode/
.directory

View File

@ -14,7 +14,8 @@ endif
hc-stream: ../main.cpp HCStream.cpp hc-stream: ../main.cpp HCStream.cpp
$(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@ $(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

29
src/.gitignore vendored Normal file
View File

@ -0,0 +1,29 @@
**/cuda-stream
**/ocl-stream
**/omp-stream
**/acc-stream
**/raja-stream
**/kokkos-stream
**/std-stream
**/sycl-stream
**/hip-stream
**/*.o
**/*.bc
**/*.sycl
**/*.tar
**/*.gz
**/*.a
**/KokkosCore_Config_*
**/.DS_Store
build/
cmake-build-*/
CMakeFiles/
.idea/
.vscode/
.directory

View File

@ -8,46 +8,7 @@ project(BabelStream VERSION 3.5 LANGUAGES CXX)
set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
#set(MODEL SYCL)
#set(SYCL_COMPILER COMPUTECPP)
#set(SYCL_COMPILER_DIR /home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu)
#set(MODEL RAJA)
#set(RAJA_IN_TREE /home/tom/Downloads/RAJA-v0.13.0/)
#set(ENABLE_CUDA ON)
#set(TARGET NVIDIA)
#set(CUDA_TOOLKIT_ROOT_DIR /opt/cuda-11.2)
#set(CUDA_ARCH sm_70)
#set(BLT_DIR /home/tom/Downloads/blt-0.3.6/)
#set(MODEL STD)
#set(ARCH cc70)
#set(CXX_EXTRA_FLAGS -v)
#set(MODEL CUDA)
#set(ARCH sm_70)
#set(CMAKE_CUDA_COMPILER /opt/cuda-11.2/bin/nvcc)
#set(MODEL OCL)
#set(OpenCL_LIBRARY /opt/rocm-4.0.0/opencl/lib/libOpenCL.so)
#set(OpenCL_INCLUDE_DIR /opt/rocm-4.0.0/opencl/lib)
#set(RELEASE_FLAGS -Ofast)
#set(CXX_EXTRA_FLAGS -O2)
#set(CMAKE_CXX_COMPILER /usr/lib/aomp/bin/clang++)
#set(MODEL OMP)
##set(OFFLOAD "AMD:gfx803")
#set(OFFLOAD "NVIDIA:sm_35")
#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/)
#set(OFFLOAD "AMD:_70")
#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/ --gcc-toolchain=/home/tom/spack/opt/spack/linux-fedora33-zen2/gcc-10.2.1/gcc-8.3.0-latmjo2hl2yv53255xkwko7k3y7bx2vv)
#set(CXX_EXTRA_LINKER_FLAGS )
#set(MODEL HIP)
#set(MODEL KOKKOS)
#set(KOKKOS_IN_TREE /home/tom/Downloads/kokkos-3.3.00/)
# the final executable name # the final executable name
set(EXE_NAME babelstream) set(EXE_NAME babelstream)
@ -101,17 +62,17 @@ endif ()
include(register_models.cmake) include(register_models.cmake)
# register out models <model_name> <preprocessor_def_name> <source files...> # register out models <model_name> <preprocessor_def_name> <source files...>
register_model(OMP OMP OMPStream.cpp) register_model(omp OMP OMPStream.cpp)
register_model(OCL OCL OCLStream.cpp) register_model(ocl OCL OCLStream.cpp)
register_model(STD STD STDStream.cpp) register_model(std STD STDStream.cpp)
register_model(STD20 STD20 STD20Stream.cpp) register_model(std20 STD20 STD20Stream.cpp)
register_model(HIP HIP HIPStream.cpp) register_model(hip HIP HIPStream.cpp)
register_model(CUDA CUDA CUDAStream.cu) register_model(cuda CUDA CUDAStream.cu)
register_model(KOKKOS KOKKOS KokkosStream.cpp) register_model(kokkos KOKKOS KokkosStream.cpp)
register_model(SYCL SYCL SYCLStream.cpp) register_model(sycl SYCL SYCLStream.cpp)
register_model(ACC ACC ACCStream.cpp) register_model(acc ACC ACCStream.cpp)
# defining RAJA collides with the RAJA namespace so USE_RAJA # defining RAJA collides with the RAJA namespace so USE_RAJA
register_model(RAJA USE_RAJA RAJAStream.cpp) register_model(raja USE_RAJA RAJAStream.cpp)
set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model") set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model")
@ -169,6 +130,7 @@ message(STATUS "Executable : ${EXE_NAME}")
# below we have all the usual CMake target setup steps # below we have all the usual CMake target setup steps
include_directories(.)
add_executable(${EXE_NAME} ${IMPL_SOURCES} main.cpp) add_executable(${EXE_NAME} ${IMPL_SOURCES} main.cpp)
target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES}) target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES})
target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS}) target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS})
@ -184,7 +146,7 @@ target_link_options(${EXE_NAME} PUBLIC LINKER:${CXX_EXTRA_LINKER_FLAGS})
target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS}) target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS})
# some models require the target to be already specified so they can finish their setup here # some models require the target to be already specified so they can finish their setup here
# this only happens if the MODEL.cmake definition contains the `setup_target` macro # this only happens if the model.cmake definition contains the `setup_target` macro
if (COMMAND setup_target) if (COMMAND setup_target)
setup_target(${EXE_NAME}) setup_target(${EXE_NAME})
endif () endif ()

View File

@ -50,8 +50,8 @@ endif
FLAGS_GNU = -O3 -std=c++11 -Drestrict=__restrict -fopenacc FLAGS_GNU = -O3 -std=c++11 -Drestrict=__restrict -fopenacc
CXXFLAGS = $(FLAGS_$(COMPILER)) CXXFLAGS = $(FLAGS_$(COMPILER))
acc-stream: main.cpp ACCStream.cpp acc-stream: ../main.cpp ACCStream.cpp
$(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@ $(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

View File

@ -86,38 +86,38 @@ run_build() {
} }
### ###
#KOKKOS_SRC="/home/tom/Downloads/kokkos-3.3.00" # KOKKOS_SRC="/home/tom/Downloads/kokkos-3.3.00"
#RAJA_SRC="/home/tom/Downloads/RAJA-v0.13.0" # RAJA_SRC="/home/tom/Downloads/RAJA-v0.13.0"
#
#GCC_CXX="/usr/bin/g++" # GCC_CXX="/usr/bin/g++"
#CLANG_CXX="/usr/bin/clang++" # CLANG_CXX="/usr/bin/clang++"
#
#NVSDK="/home/tom/Downloads/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2/" # NVSDK="/home/tom/Downloads/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2/"
#NVHPC_NVCXX="$NVSDK/compilers/bin/nvc++" # NVHPC_NVCXX="$NVSDK/compilers/bin/nvc++"
#NVHPC_NVCC="$NVSDK/cuda/11.2/bin/nvcc" # NVHPC_NVCC="$NVSDK/cuda/11.2/bin/nvcc"
#NVHPC_CUDA_DIR="$NVSDK/cuda/11.2" # NVHPC_CUDA_DIR="$NVSDK/cuda/11.2"
#"$NVSDK/compilers/bin/makelocalrc" "$NVSDK/compilers/bin/" -x # "$NVSDK/compilers/bin/makelocalrc" "$NVSDK/compilers/bin/" -x
#
#AOCC_CXX="/opt/AMD/aocc-compiler-2.3.0/bin/clang++" # AOCC_CXX="/opt/AMD/aocc-compiler-2.3.0/bin/clang++"
#AOMP_CXX="/usr/lib/aomp/bin/clang++" # AOMP_CXX="/usr/lib/aomp/bin/clang++"
#OCL_LIB="/home/tom/Downloads/oclcpuexp-2020.11.11.0.04_rel/x64/libOpenCL.so" # OCL_LIB="/home/tom/Downloads/oclcpuexp-2020.11.11.0.04_rel/x64/libOpenCL.so"
#
## AMD needs this rocm_path thing exported... # # AMD needs this rocm_path thing exported...
#export ROCM_PATH="/opt/rocm-4.0.0" # export ROCM_PATH="/opt/rocm-4.0.0"
#HIP_CXX="/opt/rocm-4.0.0/bin/hipcc" # HIP_CXX="/opt/rocm-4.0.0/bin/hipcc"
#COMPUTECPP_DIR="/home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu" # COMPUTECPP_DIR="/home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu"
#DPCPP_DIR="/home/tom/Downloads/dpcpp_compiler" # DPCPP_DIR="/home/tom/Downloads/dpcpp_compiler"
#HIPSYCL_DIR="/opt/hipsycl/cff515c/" # HIPSYCL_DIR="/opt/hipsycl/cff515c/"
#
#ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx" # ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx"
#ICPC_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/intel64/icpc" # ICPC_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/intel64/icpc"
#
#GCC_STD_PAR_LIB="tbb" # GCC_STD_PAR_LIB="tbb"
#CLANG_STD_PAR_LIB="tbb" # CLANG_STD_PAR_LIB="tbb"
#GCC_OMP_OFFLOAD_AMD=false # GCC_OMP_OFFLOAD_AMD=false
#GCC_OMP_OFFLOAD_NVIDIA=true # GCC_OMP_OFFLOAD_NVIDIA=true
#CLANG_OMP_OFFLOAD_AMD=false # CLANG_OMP_OFFLOAD_AMD=false
#CLANG_OMP_OFFLOAD_NVIDIA=false # CLANG_OMP_OFFLOAD_NVIDIA=false
### ###
AMD_ARCH="gfx_903" AMD_ARCH="gfx_903"
@ -128,39 +128,39 @@ build_gcc() {
local name="gcc_build" local name="gcc_build"
local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}" local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}"
run_build $name "${GCC_CXX:?}" OMP "$cxx" run_build $name "${GCC_CXX:?}" omp "$cxx"
if [ "$MODEL" = "all" ] || [ "$MODEL" = "OMP" ]; then if [ "$MODEL" = "all" ] || [ "$MODEL" = "OMP" ]; then
# sanity check that it at least runs # sanity check that it at least runs
echo "Sanity checking GCC OMP build..." echo "Sanity checking GCC omp build..."
"./$BUILD_DIR/OMP_$name/omp-stream" -s 1048576 -n 10 "./$BUILD_DIR/omp_$name/omp-stream" -s 1048576 -n 10
fi fi
# some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
run_build $name "${GCC_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" run_build $name "${GCC_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
run_build $name "${GCC_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" run_build $name "${GCC_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
run_build "amd_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa" run_build "amd_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa"
run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH" run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
fi fi
if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
run_build "nvidia_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none" run_build "nvidia_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none"
run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
fi fi
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
# run_build $name "${CC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON" # run_build $name "${CC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON"
run_build "cuda_$name" "${GCC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" run_build "cuda_$name" "${GCC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
run_build $name "${GCC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" run_build $name "${GCC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
run_build $name "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" run_build $name "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
# FIXME fails due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100102 # FIXME fails due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100102
# FIXME we also got https://github.com/NVIDIA/nccl/issues/494 # FIXME we also got https://github.com/NVIDIA/nccl/issues/494
# run_build "cuda_$name" "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \ # run_build "cuda_$name" "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \
# -DENABLE_CUDA=ON \ # -DENABLE_CUDA=ON \
# -DTARGET=NVIDIA \ # -DTARGET=NVIDIA \
# -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ # -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \
@ -171,46 +171,46 @@ build_gcc() {
build_clang() { build_clang() {
local name="clang_build" local name="clang_build"
local cxx="-DCMAKE_CXX_COMPILER=${CLANG_CXX:?}" local cxx="-DCMAKE_CXX_COMPILER=${CLANG_CXX:?}"
run_build $name "${CLANG_CXX:?}" OMP "$cxx" run_build $name "${CLANG_CXX:?}" omp "$cxx"
if [ "${CLANG_OMP_OFFLOAD_AMD:-false}" != "false" ]; then if [ "${CLANG_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH" run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
fi fi
if [ "${CLANG_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then if [ "${CLANG_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
fi fi
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
run_build $name "${CLANG_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
run_build $name "${CLANG_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
run_build $name "${CLANG_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" run_build $name "${CLANG_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
# run_build $name "${LANG_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported # run_build $name "${LANG_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported
run_build $name "${CLANG_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
# no clang /w RAJA+cuda because it needs nvcc which needs gcc # no clang /w RAJA+cuda because it needs nvcc which needs gcc
} }
build_nvhpc() { build_nvhpc() {
local name="nvhpc_build" local name="nvhpc_build"
local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}" local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}"
run_build $name "${NVHPC_NVCXX:?}" STD "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY" run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen" run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
} }
build_aocc() { build_aocc() {
run_build aocc_build "${AOCC_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOCC_CXX:?}" run_build aocc_build "${AOCC_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${AOCC_CXX:?}"
} }
build_aomp() { build_aomp() {
run_build aomp_amd_build "${AOMP_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=AMD:gfx906" run_build aomp_amd_build "${AOMP_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=AMD:gfx906"
#run_build aomp_nvidia_build "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=NVIDIA:$NV_ARCH" #run_build aomp_nvidia_build "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=NVIDIA:$NV_ARCH"
} }
build_hip() { build_hip() {
run_build hip_build "${HIP_CXX:?}" HIP "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}" run_build hip_build "${HIP_CXX:?}" hip "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}"
} }
build_icpx() { build_icpx() {
@ -218,7 +218,7 @@ build_icpx() {
set +u set +u
source /opt/intel/oneapi/setvars.sh -force || true source /opt/intel/oneapi/setvars.sh -force || true
set -u set -u
run_build intel_build "${ICPX_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL" run_build intel_build "${ICPX_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL"
} }
build_icpc() { build_icpc() {
@ -228,31 +228,31 @@ build_icpc() {
set -u set -u
local name="intel_build" local name="intel_build"
local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}" local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}"
run_build $name "${ICPC_CXX:?}" OMP "$cxx" run_build $name "${ICPC_CXX:?}" omp "$cxx"
run_build $name "${ICPC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" run_build $name "${ICPC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
run_build $name "${ICPC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" run_build $name "${ICPC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
run_build $name "${ICPC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" run_build $name "${ICPC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
} }
build_computecpp() { build_computecpp() {
run_build computecpp_build "compute++" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ run_build computecpp_build "compute++" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
-DSYCL_COMPILER=COMPUTECPP \ -DSYCL_COMPILER=COMPUTECPP \
-DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \ -DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \
-DOpenCL_LIBRARY=${OCL_LIB:?}" -DOpenCL_LIBRARY=${OCL_LIB:?}"
} }
build_dpcpp() { build_dpcpp() {
run_build intel_build "${DPCPP_DIR:?}" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ run_build intel_build "${DPCPP_DIR:?}" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
-DSYCL_COMPILER=DPCPP \ -DSYCL_COMPILER=DPCPP \
-DSYCL_COMPILER_DIR=${DPCPP_DIR:?}" -DSYCL_COMPILER_DIR=${DPCPP_DIR:?}"
# for oneAPI BaseKit: # for oneAPI BaseKit:
# source /opt/intel/oneapi/setvars.sh -force # source /opt/intel/oneapi/setvars.sh -force
# run_build intel_build "dpcpp" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} -DSYCL_COMPILER=ONEAPI-DPCPP" # run_build intel_build "dpcpp" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} -DSYCL_COMPILER=ONEAPI-DPCPP"
} }
build_hipsycl() { build_hipsycl() {
run_build hipsycl_build "syclcc" SYCL " run_build hipsycl_build "syclcc" sycl "
-DSYCL_COMPILER=HIPSYCL \ -DSYCL_COMPILER=HIPSYCL \
-DSYCL_COMPILER_DIR=${HIPSYCL_DIR:?}" -DSYCL_COMPILER_DIR=${HIPSYCL_DIR:?}"
} }

View File

@ -31,8 +31,8 @@ MEM_PAGEFAULT= -DPAGEFAULT
MEM_MODE = $(MEM_$(MEM)) MEM_MODE = $(MEM_$(MEM))
cuda-stream: main.cpp CUDAStream.cu cuda-stream: ../main.cpp CUDAStream.cu
$(CUDA_CXX) -std=c++11 $(CXXFLAGS) -arch=$(NVARCH) $(MEM_MODE) -DCUDA $^ $(EXTRA_FLAGS) -o $@ $(CUDA_CXX) -std=c++11 $(CXXFLAGS) -arch=$(NVARCH) $(MEM_MODE) -DCUDA $^ $(EXTRA_FLAGS) -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

View File

@ -2,8 +2,8 @@
HIP_PATH?= /opt/rocm/hip HIP_PATH?= /opt/rocm/hip
HIPCC=$(HIP_PATH)/bin/hipcc HIPCC=$(HIP_PATH)/bin/hipcc
hip-stream: main.cpp HIPStream.cpp hip-stream: ../main.cpp HIPStream.cpp
$(HIPCC) $(CXXFLAGS) -O3 -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@ $(HIPCC) $(CXXFLAGS) -O3 -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

View File

@ -71,7 +71,7 @@ CXX = $(NVCC_WRAPPER)
endif endif
endif endif
OBJ = main.o KokkosStream.o OBJ = KokkosStream.o
CXXFLAGS = -O3 CXXFLAGS = -O3
LINKFLAGS = # empty for now LINKFLAGS = # empty for now
@ -85,14 +85,15 @@ endif
endif endif
include $(KOKKOS_PATH)/Makefile.kokkos include $(KOKKOS_PATH)/Makefile.kokkos
HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
kokkos-stream: $(OBJ) $(KOKKOS_LINK_DEPENDS) kokkos-stream: ../main.cpp $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(CXX) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -DKOKKOS -o $@ $(CXX) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -DKOKKOS -o $@ -I. -I..
%.o: %.cpp $(KOKKOS_CPP_DEPENDS) %.o: %.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -DKOKKOS -c $< $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
.PHONY: clean .PHONY: clean
clean: clean:
rm -f kokkos-stream main.o KokkosStream.o Kokkos_*.o rm -f kokkos-stream main.o KokkosStream.o Kokkos_*.o KokkosCore_*

View File

@ -30,8 +30,8 @@ else
LIBS = -lOpenCL LIBS = -lOpenCL
endif endif
ocl-stream: main.cpp OCLStream.cpp ocl-stream: ../main.cpp OCLStream.cpp
$(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@ $(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

View File

@ -91,8 +91,8 @@ endif
OMP = $(OMP_$(COMPILER)_$(TARGET)) OMP = $(OMP_$(COMPILER)_$(TARGET))
omp-stream: main.cpp OMPStream.cpp omp-stream: ../main.cpp OMPStream.cpp
$(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@ $(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

View File

@ -49,8 +49,8 @@ endif
CXXFLAGS = --expt-extended-lambda -O3 -std=c++11 -x cu -Xcompiler -fopenmp -arch $(ARCH) CXXFLAGS = --expt-extended-lambda -O3 -std=c++11 -x cu -Xcompiler -fopenmp -arch $(ARCH)
endif endif
raja-stream: main.cpp RAJAStream.cpp raja-stream: ../main.cpp RAJAStream.cpp
$(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@ $(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

View File

@ -118,22 +118,24 @@ endfunction()
macro(register_model NAME PREPROCESSOR_NAME) macro(register_model NAME PREPROCESSOR_NAME)
string(TOUPPER ${NAME} MODEL_UPPER)
list(APPEND REGISTERED_MODELS "${NAME}") list(APPEND REGISTERED_MODELS "${NAME}")
list(APPEND IMPL_${MODEL_UPPER}_SOURCES "${ARGN}") string(TOUPPER ${NAME} MODEL_UPPER)
list(APPEND IMPL_${MODEL_UPPER}_SOURCES "${NAME}/${ARGN}")
list(APPEND IMPL_${MODEL_UPPER}_DEFINITIONS "${PREPROCESSOR_NAME}") list(APPEND IMPL_${MODEL_UPPER}_DEFINITIONS "${PREPROCESSOR_NAME}")
endmacro() endmacro()
macro(load_model MODEL) macro(load_model MODEL)
string(TOUPPER "${MODEL}" MODEL_UPPER) if ("${MODEL}" IN_LIST REGISTERED_MODELS)
if ("${MODEL_UPPER}" IN_LIST REGISTERED_MODELS) string(TOLOWER "${MODEL}" MODEL_LOWER)
set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL_UPPER}.cmake) set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL_LOWER}/model.cmake)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/${MODEL_LOWER})
if (NOT EXISTS ${MODEL_FILE}) if (NOT EXISTS ${MODEL_FILE})
message(FATAL_ERROR "${MODEL_FILE} not found, perhaps it needs to be implemented?") message(FATAL_ERROR "${MODEL_FILE} not found, perhaps it needs to be implemented?")
endif () endif ()
include(${MODEL_FILE}) include(${MODEL_FILE})
string(TOUPPER "${MODEL}" MODEL_UPPER)
list(APPEND IMPL_SOURCES ${IMPL_${MODEL_UPPER}_SOURCES}) list(APPEND IMPL_SOURCES ${IMPL_${MODEL_UPPER}_SOURCES})
list(APPEND IMPL_DEFINITIONS ${IMPL_${MODEL_UPPER}_DEFINITIONS}) list(APPEND IMPL_DEFINITIONS ${IMPL_${MODEL_UPPER}_DEFINITIONS})

View File

@ -6,8 +6,8 @@
CXXFLAGS=-O3 -std=c++17 -stdpar -DSTD CXXFLAGS=-O3 -std=c++17 -stdpar -DSTD
STD_CXX=nvc++ STD_CXX=nvc++
std-stream: main.cpp STDStream.cpp std-stream: ../main.cpp STDStream.cpp
$(STD_CXX) $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ $(STD_CXX) $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

View File

@ -17,8 +17,8 @@ FLAGS_GNU = -O3 -std=c++2a -march=native
CXXFLAGS = $(FLAGS_$(COMPILER)) CXXFLAGS = $(FLAGS_$(COMPILER))
std20-stream: main.cpp STD20Stream.cpp std20-stream: ../main.cpp STD20Stream.cpp
$(CXX) -DSTD20 $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ $(CXX) -DSTD20 $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean:

View File

@ -73,8 +73,8 @@ SYCL_LINK_FLAGS = $(SYCL_$(COMPILER)_LINK_FLAGS)
SYCL_INCLUDE = $(SYCL_$(COMPILER)_INCLUDE) SYCL_INCLUDE = $(SYCL_$(COMPILER)_INCLUDE)
# only ComputeCpp generates .sycl files which is a bit odd to deal with so we opted to compile everything together # only ComputeCpp generates .sycl files which is a bit odd to deal with so we opted to compile everything together
sycl-stream: main.cpp SYCLStream.cpp sycl-stream: ../main.cpp SYCLStream.cpp
$(SYCL_SYCLCXX) $(SYCL_SYCLFLAGS) $(SYCL_FLAGS) $(SYCL_INCLUDE) -DSYCL $(EXTRA_FLAGS) $(SYCL_LINK_FLAGS) $^ -o $@ $(SYCL_SYCLCXX) $(SYCL_SYCLFLAGS) $(SYCL_FLAGS) $(SYCL_INCLUDE) -DSYCL $(EXTRA_FLAGS) $(SYCL_LINK_FLAGS) $^ -o $@ -I. -I..
.PHONY: clean .PHONY: clean
clean: clean: