Merge branch 'main' into julia
This commit is contained in:
commit
fe180656d1
@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file.
|
|||||||
- CMake build system added for all models.
|
- CMake build system added for all models.
|
||||||
- SYCL device check for FP64 support.
|
- SYCL device check for FP64 support.
|
||||||
- New implementation using TBB.
|
- New implementation using TBB.
|
||||||
|
- Compiler options for Fujitsu added to OpenMP.
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- Default branch renamed from `master` to `main`.
|
- Default branch renamed from `master` to `main`.
|
||||||
|
|||||||
28
OMP.cmake
28
OMP.cmake
@ -30,6 +30,9 @@
|
|||||||
# ARMClang = ARM Compiler based on Clang (arm.com)
|
# ARMClang = ARM Compiler based on Clang (arm.com)
|
||||||
# These are only added in CMake 3.20:
|
# These are only added in CMake 3.20:
|
||||||
# NVHPC = NVIDIA HPC SDK Compiler (nvidia.com)
|
# NVHPC = NVIDIA HPC SDK Compiler (nvidia.com)
|
||||||
|
# These are only added in CMake 3.21
|
||||||
|
# Fujitsu = Fujitsu HPC compiler (Trad mode)
|
||||||
|
# FujitsuClang = Fujitsu HPC compiler (Clang mode)
|
||||||
|
|
||||||
|
|
||||||
# CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have:
|
# CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have:
|
||||||
@ -39,8 +42,9 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
#predefined offload flags based on compiler id
|
# predefined offload flags based on compiler id and vendor,
|
||||||
|
# the format is (COMPILER and VENDOR must be UPPERCASE):
|
||||||
|
# Compiler: OMP_FLAGS_OFFLOAD_<COMPILER?>_<VNEDOR?>
|
||||||
|
|
||||||
set(OMP_FLAGS_OFFLOAD_INTEL
|
set(OMP_FLAGS_OFFLOAD_INTEL
|
||||||
-qnextgen -fiopenmp -fopenmp-targets=spir64)
|
-qnextgen -fiopenmp -fopenmp-targets=spir64)
|
||||||
@ -56,15 +60,25 @@ set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG
|
|||||||
-march=) # prefix only, arch appended by the vendor:arch tuple
|
-march=) # prefix only, arch appended by the vendor:arch tuple
|
||||||
|
|
||||||
|
|
||||||
|
# for standard (non-offload) omp, the format is (COMPILER and ARCH must be UPPERCASE):
|
||||||
|
# Compiler: OMP_FLAGS_CPU_<COMPILER?>_<ARCH?>
|
||||||
|
# Linker: OMP_LINK_FLAGS_CPU_<COMPILER?>_<ARCH?>
|
||||||
|
|
||||||
|
set(OMP_FLAGS_CPU_FUJITSU
|
||||||
|
-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16)
|
||||||
|
set(OMP_LINK_FLAGS_CPU_FUJITSU
|
||||||
|
-Kopenmp)
|
||||||
|
|
||||||
set(OMP_FLAGS_CPU_INTEL
|
set(OMP_FLAGS_CPU_INTEL
|
||||||
-qopt-streaming-stores=always)
|
-qopt-streaming-stores=always)
|
||||||
|
|
||||||
set(OMP_FLAGS_CPU_GNU_PPC64LE
|
set(OMP_FLAGS_CPU_GNU_PPC64LE
|
||||||
-mcpu=native)
|
-mcpu=native)
|
||||||
|
|
||||||
set(OMP_FLAGS_CPU_XL
|
set(OMP_FLAGS_CPU_XL
|
||||||
-O5 -qarch=auto -qtune=auto)
|
-O5 -qarch=auto -qtune=auto)
|
||||||
|
|
||||||
# NEC
|
set(OMP_FLAGS_CPU_NEC -O4 -finline) # CMake doesn't detect this so it's meant to be chosen by register_flag_optional(ARCH)
|
||||||
set(OMP_FLAGS_CPU_NEC -O4 -finline)
|
|
||||||
|
|
||||||
register_flag_optional(CMAKE_CXX_COMPILER
|
register_flag_optional(CMAKE_CXX_COMPILER
|
||||||
"Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)"
|
"Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)"
|
||||||
@ -122,6 +136,12 @@ macro(setup)
|
|||||||
${ARCH}
|
${ARCH}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register_append_compiler_and_arch_specific_link_flags(
|
||||||
|
OMP_LINK_FLAGS_CPU
|
||||||
|
${COMPILER}
|
||||||
|
${ARCH}
|
||||||
|
)
|
||||||
|
|
||||||
elseif ("${OFFLOAD}" STREQUAL ON)
|
elseif ("${OFFLOAD}" STREQUAL ON)
|
||||||
# offload but with custom flags
|
# offload but with custom flags
|
||||||
register_definitions(OMP_TARGET_GPU)
|
register_definitions(OMP_TARGET_GPU)
|
||||||
|
|||||||
@ -3,7 +3,8 @@ ifndef COMPILER
|
|||||||
define compiler_help
|
define compiler_help
|
||||||
Set COMPILER to change flags (defaulting to GNU).
|
Set COMPILER to change flags (defaulting to GNU).
|
||||||
Available compilers are:
|
Available compilers are:
|
||||||
CLANG CRAY GNU GNU_PPC INTEL XL PGI NEC ARMCLANG AOMP
|
CLANG CRAY GNU GNU_PPC INTEL XL PGI
|
||||||
|
NEC ARMCLANG AOMP FUJITSU
|
||||||
|
|
||||||
Note: GCC on PPC requires -mcpu=native instead of -march=native so we have a special case for it
|
Note: GCC on PPC requires -mcpu=native instead of -march=native so we have a special case for it
|
||||||
|
|
||||||
@ -49,6 +50,7 @@ COMPILER_XL = xlc++
|
|||||||
COMPILER_PGI = pgc++
|
COMPILER_PGI = pgc++
|
||||||
COMPILER_NEC = /opt/nec/ve/bin/nc++
|
COMPILER_NEC = /opt/nec/ve/bin/nc++
|
||||||
COMPILER_AOMP = clang++
|
COMPILER_AOMP = clang++
|
||||||
|
COMPILER_FUJITSU=FCC
|
||||||
CXX = $(COMPILER_$(COMPILER))
|
CXX = $(COMPILER_$(COMPILER))
|
||||||
|
|
||||||
FLAGS_GNU = -O3 -std=c++11 -march=native
|
FLAGS_GNU = -O3 -std=c++11 -march=native
|
||||||
@ -61,6 +63,7 @@ FLAGS_PGI = -O3 -std=c++11
|
|||||||
FLAGS_NEC = -O4 -finline -std=c++11
|
FLAGS_NEC = -O4 -finline -std=c++11
|
||||||
FLAGS_ARMCLANG = -O3 -std=c++11
|
FLAGS_ARMCLANG = -O3 -std=c++11
|
||||||
FLAGS_AOMP = -O3 -std=c++11
|
FLAGS_AOMP = -O3 -std=c++11
|
||||||
|
FLAGS_FUJITSU=-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16
|
||||||
CXXFLAGS = $(FLAGS_$(COMPILER))
|
CXXFLAGS = $(FLAGS_$(COMPILER))
|
||||||
|
|
||||||
# OpenMP flags for CPUs
|
# OpenMP flags for CPUs
|
||||||
@ -73,6 +76,7 @@ OMP_CLANG_CPU = -fopenmp=libomp
|
|||||||
OMP_XL_CPU = -qsmp=omp -qthreaded
|
OMP_XL_CPU = -qsmp=omp -qthreaded
|
||||||
OMP_PGI_CPU = -mp
|
OMP_PGI_CPU = -mp
|
||||||
OMP_NEC_CPU = -fopenmp
|
OMP_NEC_CPU = -fopenmp
|
||||||
|
OMP_FUJITSU_CPU=-Kopenmp
|
||||||
|
|
||||||
# OpenMP flags for NVIDIA
|
# OpenMP flags for NVIDIA
|
||||||
OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU
|
OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU
|
||||||
|
|||||||
@ -134,20 +134,18 @@ setup_aocc() {
|
|||||||
|
|
||||||
setup_nvhpc() {
|
setup_nvhpc() {
|
||||||
echo "Preparing Nvidia HPC SDK"
|
echo "Preparing Nvidia HPC SDK"
|
||||||
|
|
||||||
local tarball="nvhpc.tar.gz"
|
local tarball="nvhpc.tar.gz"
|
||||||
# local url="http://localhost:8000/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz"
|
# local url="http://localhost:8000/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz"
|
||||||
local url="https://developer.download.nvidia.com/hpc-sdk/21.2/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz"
|
local url="https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz"
|
||||||
|
|
||||||
get_and_untar "$tarball" "$url"
|
get_and_untar "$tarball" "$url"
|
||||||
|
|
||||||
local sdk_dir="$PWD/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2"
|
local sdk_dir="$PWD/nvhpc_2021_215_Linux_x86_64_cuda_11.3/install_components/Linux_x86_64/21.5"
|
||||||
local bin_dir="$sdk_dir/compilers/bin"
|
local bin_dir="$sdk_dir/compilers/bin"
|
||||||
"$bin_dir/makelocalrc" "$bin_dir" -x
|
"$bin_dir/makelocalrc" "$bin_dir" -x
|
||||||
|
|
||||||
export_var NVHPC_NVCXX "$bin_dir/nvc++"
|
export_var NVHPC_NVCXX "$bin_dir/nvc++"
|
||||||
export_var NVHPC_NVCC "$sdk_dir/cuda/11.2/bin/nvcc"
|
export_var NVHPC_NVCC "$sdk_dir/cuda/11.3/bin/nvcc"
|
||||||
export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.2"
|
export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.3"
|
||||||
echo "Installed CUDA versions:"
|
echo "Installed CUDA versions:"
|
||||||
ls "$sdk_dir/cuda"
|
ls "$sdk_dir/cuda"
|
||||||
verify_bin_exists "$NVHPC_NVCXX"
|
verify_bin_exists "$NVHPC_NVCXX"
|
||||||
|
|||||||
@ -124,7 +124,7 @@ run_build() {
|
|||||||
|
|
||||||
AMD_ARCH="gfx_903"
|
AMD_ARCH="gfx_903"
|
||||||
NV_ARCH="sm_70"
|
NV_ARCH="sm_70"
|
||||||
NV_ARCH_CCXY="cuda11.2,cc80"
|
NV_ARCH_CCXY="cuda11.3,cc80"
|
||||||
|
|
||||||
build_gcc() {
|
build_gcc() {
|
||||||
local name="gcc_build"
|
local name="gcc_build"
|
||||||
|
|||||||
@ -20,7 +20,9 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
macro(wipe_gcc_style_optimisation_flags VAR)
|
macro(wipe_gcc_style_optimisation_flags VAR)
|
||||||
string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}})
|
if(${VAR})
|
||||||
|
string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}})
|
||||||
|
endif()
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
macro(register_link_library)
|
macro(register_link_library)
|
||||||
@ -41,7 +43,7 @@ macro(register_append_link_flags)
|
|||||||
list(APPEND LINK_FLAGS ${ARGN})
|
list(APPEND LINK_FLAGS ${ARGN})
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
|
function(bind_cxx_and_arch OUT PREFIX CXX ARCH)
|
||||||
string(TOUPPER ${CXX} _CXX)
|
string(TOUPPER ${CXX} _CXX)
|
||||||
string(TOUPPER ${ARCH} _ARCH)
|
string(TOUPPER ${ARCH} _ARCH)
|
||||||
set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}_${_ARCH}}")
|
set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}_${_ARCH}}")
|
||||||
@ -52,6 +54,17 @@ macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
|
|||||||
if (_CXX_ARCH_SPECIFIC_FLAGS)
|
if (_CXX_ARCH_SPECIFIC_FLAGS)
|
||||||
register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS})
|
register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS})
|
||||||
endif ()
|
endif ()
|
||||||
|
set(${OUT} "${_CXX_ARCH_SPECIFIC_FLAGS}" PARENT_SCOPE)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
|
||||||
|
bind_cxx_and_arch(OUT ${PREFIX} ${CXX} ${ARCH})
|
||||||
|
register_append_cxx_flags(ANY ${OUT})
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
macro(register_append_compiler_and_arch_specific_link_flags PREFIX CXX ARCH)
|
||||||
|
bind_cxx_and_arch(OUT ${PREFIX} ${CXX} ${ARCH})
|
||||||
|
register_append_link_flags(${OUT})
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
macro(register_definitions)
|
macro(register_definitions)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user