Merge branch 'main' into julia

This commit is contained in:
Tom Lin 2021-06-30 18:44:17 +01:00
commit fe180656d1
6 changed files with 51 additions and 15 deletions

View File

@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file.
- CMake build system added for all models.
- SYCL device check for FP64 support.
- New implementation using TBB.
- Compiler options for Fujitsu added to OpenMP.
### Changed
- Default branch renamed from `master` to `main`.

View File

@ -30,6 +30,9 @@
# ARMClang = ARM Compiler based on Clang (arm.com)
# These are only added in CMake 3.20:
# NVHPC = NVIDIA HPC SDK Compiler (nvidia.com)
# These are only added in CMake 3.21
# Fujitsu = Fujitsu HPC compiler (Trad mode)
# FujitsuClang = Fujitsu HPC compiler (Clang mode)
# CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have:
@ -39,8 +42,9 @@
#
#predefined offload flags based on compiler id
# predefined offload flags based on compiler id and vendor,
# the format is (COMPILER and VENDOR must be UPPERCASE):
# Compiler: OMP_FLAGS_OFFLOAD_<COMPILER?>_<VNEDOR?>
set(OMP_FLAGS_OFFLOAD_INTEL
-qnextgen -fiopenmp -fopenmp-targets=spir64)
@ -56,15 +60,25 @@ set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG
-march=) # prefix only, arch appended by the vendor:arch tuple
# for standard (non-offload) omp, the format is (COMPILER and ARCH must be UPPERCASE):
# Compiler: OMP_FLAGS_CPU_<COMPILER?>_<ARCH?>
# Linker: OMP_LINK_FLAGS_CPU_<COMPILER?>_<ARCH?>
set(OMP_FLAGS_CPU_FUJITSU
-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16)
set(OMP_LINK_FLAGS_CPU_FUJITSU
-Kopenmp)
set(OMP_FLAGS_CPU_INTEL
-qopt-streaming-stores=always)
set(OMP_FLAGS_CPU_GNU_PPC64LE
-mcpu=native)
set(OMP_FLAGS_CPU_XL
-O5 -qarch=auto -qtune=auto)
# NEC
set(OMP_FLAGS_CPU_NEC -O4 -finline)
set(OMP_FLAGS_CPU_NEC -O4 -finline) # CMake doesn't detect this so it's meant to be chosen by register_flag_optional(ARCH)
register_flag_optional(CMAKE_CXX_COMPILER
"Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)"
@ -122,6 +136,12 @@ macro(setup)
${ARCH}
)
register_append_compiler_and_arch_specific_link_flags(
OMP_LINK_FLAGS_CPU
${COMPILER}
${ARCH}
)
elseif ("${OFFLOAD}" STREQUAL ON)
# offload but with custom flags
register_definitions(OMP_TARGET_GPU)

View File

@ -3,7 +3,8 @@ ifndef COMPILER
define compiler_help
Set COMPILER to change flags (defaulting to GNU).
Available compilers are:
CLANG CRAY GNU GNU_PPC INTEL XL PGI NEC ARMCLANG AOMP
CLANG CRAY GNU GNU_PPC INTEL XL PGI
NEC ARMCLANG AOMP FUJITSU
Note: GCC on PPC requires -mcpu=native instead of -march=native so we have a special case for it
@ -49,6 +50,7 @@ COMPILER_XL = xlc++
COMPILER_PGI = pgc++
COMPILER_NEC = /opt/nec/ve/bin/nc++
COMPILER_AOMP = clang++
COMPILER_FUJITSU=FCC
CXX = $(COMPILER_$(COMPILER))
FLAGS_GNU = -O3 -std=c++11 -march=native
@ -61,6 +63,7 @@ FLAGS_PGI = -O3 -std=c++11
FLAGS_NEC = -O4 -finline -std=c++11
FLAGS_ARMCLANG = -O3 -std=c++11
FLAGS_AOMP = -O3 -std=c++11
FLAGS_FUJITSU=-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16
CXXFLAGS = $(FLAGS_$(COMPILER))
# OpenMP flags for CPUs
@ -73,6 +76,7 @@ OMP_CLANG_CPU = -fopenmp=libomp
OMP_XL_CPU = -qsmp=omp -qthreaded
OMP_PGI_CPU = -mp
OMP_NEC_CPU = -fopenmp
OMP_FUJITSU_CPU=-Kopenmp
# OpenMP flags for NVIDIA
OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU

View File

@ -134,20 +134,18 @@ setup_aocc() {
setup_nvhpc() {
echo "Preparing Nvidia HPC SDK"
local tarball="nvhpc.tar.gz"
# local url="http://localhost:8000/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz"
local url="https://developer.download.nvidia.com/hpc-sdk/21.2/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz"
# local url="http://localhost:8000/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz"
local url="https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz"
get_and_untar "$tarball" "$url"
local sdk_dir="$PWD/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2"
local sdk_dir="$PWD/nvhpc_2021_215_Linux_x86_64_cuda_11.3/install_components/Linux_x86_64/21.5"
local bin_dir="$sdk_dir/compilers/bin"
"$bin_dir/makelocalrc" "$bin_dir" -x
export_var NVHPC_NVCXX "$bin_dir/nvc++"
export_var NVHPC_NVCC "$sdk_dir/cuda/11.2/bin/nvcc"
export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.2"
export_var NVHPC_NVCC "$sdk_dir/cuda/11.3/bin/nvcc"
export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.3"
echo "Installed CUDA versions:"
ls "$sdk_dir/cuda"
verify_bin_exists "$NVHPC_NVCXX"

View File

@ -124,7 +124,7 @@ run_build() {
AMD_ARCH="gfx_903"
NV_ARCH="sm_70"
NV_ARCH_CCXY="cuda11.2,cc80"
NV_ARCH_CCXY="cuda11.3,cc80"
build_gcc() {
local name="gcc_build"

View File

@ -20,7 +20,9 @@
#
macro(wipe_gcc_style_optimisation_flags VAR)
string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}})
if(${VAR})
string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}})
endif()
endmacro()
macro(register_link_library)
@ -41,7 +43,7 @@ macro(register_append_link_flags)
list(APPEND LINK_FLAGS ${ARGN})
endmacro()
macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
function(bind_cxx_and_arch OUT PREFIX CXX ARCH)
string(TOUPPER ${CXX} _CXX)
string(TOUPPER ${ARCH} _ARCH)
set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}_${_ARCH}}")
@ -52,6 +54,17 @@ macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
if (_CXX_ARCH_SPECIFIC_FLAGS)
register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS})
endif ()
set(${OUT} "${_CXX_ARCH_SPECIFIC_FLAGS}" PARENT_SCOPE)
endfunction()
macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
bind_cxx_and_arch(OUT ${PREFIX} ${CXX} ${ARCH})
register_append_cxx_flags(ANY ${OUT})
endmacro()
macro(register_append_compiler_and_arch_specific_link_flags PREFIX CXX ARCH)
bind_cxx_and_arch(OUT ${PREFIX} ${CXX} ${ARCH})
register_append_link_flags(${OUT})
endmacro()
macro(register_definitions)