Merge branch 'main' into julia
This commit is contained in:
commit
fe180656d1
@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file.
|
||||
- CMake build system added for all models.
|
||||
- SYCL device check for FP64 support.
|
||||
- New implementation using TBB.
|
||||
- Compiler options for Fujitsu added to OpenMP.
|
||||
|
||||
### Changed
|
||||
- Default branch renamed from `master` to `main`.
|
||||
|
||||
28
OMP.cmake
28
OMP.cmake
@ -30,6 +30,9 @@
|
||||
# ARMClang = ARM Compiler based on Clang (arm.com)
|
||||
# These are only added in CMake 3.20:
|
||||
# NVHPC = NVIDIA HPC SDK Compiler (nvidia.com)
|
||||
# These are only added in CMake 3.21
|
||||
# Fujitsu = Fujitsu HPC compiler (Trad mode)
|
||||
# FujitsuClang = Fujitsu HPC compiler (Clang mode)
|
||||
|
||||
|
||||
# CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have:
|
||||
@ -39,8 +42,9 @@
|
||||
#
|
||||
|
||||
|
||||
#predefined offload flags based on compiler id
|
||||
|
||||
# predefined offload flags based on compiler id and vendor,
|
||||
# the format is (COMPILER and VENDOR must be UPPERCASE):
|
||||
# Compiler: OMP_FLAGS_OFFLOAD_<COMPILER?>_<VNEDOR?>
|
||||
|
||||
set(OMP_FLAGS_OFFLOAD_INTEL
|
||||
-qnextgen -fiopenmp -fopenmp-targets=spir64)
|
||||
@ -56,15 +60,25 @@ set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG
|
||||
-march=) # prefix only, arch appended by the vendor:arch tuple
|
||||
|
||||
|
||||
# for standard (non-offload) omp, the format is (COMPILER and ARCH must be UPPERCASE):
|
||||
# Compiler: OMP_FLAGS_CPU_<COMPILER?>_<ARCH?>
|
||||
# Linker: OMP_LINK_FLAGS_CPU_<COMPILER?>_<ARCH?>
|
||||
|
||||
set(OMP_FLAGS_CPU_FUJITSU
|
||||
-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16)
|
||||
set(OMP_LINK_FLAGS_CPU_FUJITSU
|
||||
-Kopenmp)
|
||||
|
||||
set(OMP_FLAGS_CPU_INTEL
|
||||
-qopt-streaming-stores=always)
|
||||
|
||||
set(OMP_FLAGS_CPU_GNU_PPC64LE
|
||||
-mcpu=native)
|
||||
|
||||
set(OMP_FLAGS_CPU_XL
|
||||
-O5 -qarch=auto -qtune=auto)
|
||||
|
||||
# NEC
|
||||
set(OMP_FLAGS_CPU_NEC -O4 -finline)
|
||||
set(OMP_FLAGS_CPU_NEC -O4 -finline) # CMake doesn't detect this so it's meant to be chosen by register_flag_optional(ARCH)
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)"
|
||||
@ -122,6 +136,12 @@ macro(setup)
|
||||
${ARCH}
|
||||
)
|
||||
|
||||
register_append_compiler_and_arch_specific_link_flags(
|
||||
OMP_LINK_FLAGS_CPU
|
||||
${COMPILER}
|
||||
${ARCH}
|
||||
)
|
||||
|
||||
elseif ("${OFFLOAD}" STREQUAL ON)
|
||||
# offload but with custom flags
|
||||
register_definitions(OMP_TARGET_GPU)
|
||||
|
||||
@ -3,7 +3,8 @@ ifndef COMPILER
|
||||
define compiler_help
|
||||
Set COMPILER to change flags (defaulting to GNU).
|
||||
Available compilers are:
|
||||
CLANG CRAY GNU GNU_PPC INTEL XL PGI NEC ARMCLANG AOMP
|
||||
CLANG CRAY GNU GNU_PPC INTEL XL PGI
|
||||
NEC ARMCLANG AOMP FUJITSU
|
||||
|
||||
Note: GCC on PPC requires -mcpu=native instead of -march=native so we have a special case for it
|
||||
|
||||
@ -49,6 +50,7 @@ COMPILER_XL = xlc++
|
||||
COMPILER_PGI = pgc++
|
||||
COMPILER_NEC = /opt/nec/ve/bin/nc++
|
||||
COMPILER_AOMP = clang++
|
||||
COMPILER_FUJITSU=FCC
|
||||
CXX = $(COMPILER_$(COMPILER))
|
||||
|
||||
FLAGS_GNU = -O3 -std=c++11 -march=native
|
||||
@ -61,6 +63,7 @@ FLAGS_PGI = -O3 -std=c++11
|
||||
FLAGS_NEC = -O4 -finline -std=c++11
|
||||
FLAGS_ARMCLANG = -O3 -std=c++11
|
||||
FLAGS_AOMP = -O3 -std=c++11
|
||||
FLAGS_FUJITSU=-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16
|
||||
CXXFLAGS = $(FLAGS_$(COMPILER))
|
||||
|
||||
# OpenMP flags for CPUs
|
||||
@ -73,6 +76,7 @@ OMP_CLANG_CPU = -fopenmp=libomp
|
||||
OMP_XL_CPU = -qsmp=omp -qthreaded
|
||||
OMP_PGI_CPU = -mp
|
||||
OMP_NEC_CPU = -fopenmp
|
||||
OMP_FUJITSU_CPU=-Kopenmp
|
||||
|
||||
# OpenMP flags for NVIDIA
|
||||
OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU
|
||||
|
||||
@ -134,20 +134,18 @@ setup_aocc() {
|
||||
|
||||
setup_nvhpc() {
|
||||
echo "Preparing Nvidia HPC SDK"
|
||||
|
||||
local tarball="nvhpc.tar.gz"
|
||||
# local url="http://localhost:8000/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz"
|
||||
local url="https://developer.download.nvidia.com/hpc-sdk/21.2/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz"
|
||||
|
||||
# local url="http://localhost:8000/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz"
|
||||
local url="https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz"
|
||||
get_and_untar "$tarball" "$url"
|
||||
|
||||
local sdk_dir="$PWD/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2"
|
||||
local sdk_dir="$PWD/nvhpc_2021_215_Linux_x86_64_cuda_11.3/install_components/Linux_x86_64/21.5"
|
||||
local bin_dir="$sdk_dir/compilers/bin"
|
||||
"$bin_dir/makelocalrc" "$bin_dir" -x
|
||||
|
||||
export_var NVHPC_NVCXX "$bin_dir/nvc++"
|
||||
export_var NVHPC_NVCC "$sdk_dir/cuda/11.2/bin/nvcc"
|
||||
export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.2"
|
||||
export_var NVHPC_NVCC "$sdk_dir/cuda/11.3/bin/nvcc"
|
||||
export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.3"
|
||||
echo "Installed CUDA versions:"
|
||||
ls "$sdk_dir/cuda"
|
||||
verify_bin_exists "$NVHPC_NVCXX"
|
||||
|
||||
@ -124,7 +124,7 @@ run_build() {
|
||||
|
||||
AMD_ARCH="gfx_903"
|
||||
NV_ARCH="sm_70"
|
||||
NV_ARCH_CCXY="cuda11.2,cc80"
|
||||
NV_ARCH_CCXY="cuda11.3,cc80"
|
||||
|
||||
build_gcc() {
|
||||
local name="gcc_build"
|
||||
|
||||
@ -20,7 +20,9 @@
|
||||
#
|
||||
|
||||
macro(wipe_gcc_style_optimisation_flags VAR)
|
||||
string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}})
|
||||
if(${VAR})
|
||||
string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}})
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(register_link_library)
|
||||
@ -41,7 +43,7 @@ macro(register_append_link_flags)
|
||||
list(APPEND LINK_FLAGS ${ARGN})
|
||||
endmacro()
|
||||
|
||||
macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
|
||||
function(bind_cxx_and_arch OUT PREFIX CXX ARCH)
|
||||
string(TOUPPER ${CXX} _CXX)
|
||||
string(TOUPPER ${ARCH} _ARCH)
|
||||
set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}_${_ARCH}}")
|
||||
@ -52,6 +54,17 @@ macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
|
||||
if (_CXX_ARCH_SPECIFIC_FLAGS)
|
||||
register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS})
|
||||
endif ()
|
||||
set(${OUT} "${_CXX_ARCH_SPECIFIC_FLAGS}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
|
||||
bind_cxx_and_arch(OUT ${PREFIX} ${CXX} ${ARCH})
|
||||
register_append_cxx_flags(ANY ${OUT})
|
||||
endmacro()
|
||||
|
||||
macro(register_append_compiler_and_arch_specific_link_flags PREFIX CXX ARCH)
|
||||
bind_cxx_and_arch(OUT ${PREFIX} ${CXX} ${ARCH})
|
||||
register_append_link_flags(${OUT})
|
||||
endmacro()
|
||||
|
||||
macro(register_definitions)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user