diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d209e2..976964a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file. - CMake build system added for all models. - SYCL device check for FP64 support. - New implementation using TBB. +- Compiler options for Fujitsu added to OpenMP. ### Changed - Default branch renamed from `master` to `main`. diff --git a/OMP.cmake b/OMP.cmake index c8dde9f..1955ebc 100644 --- a/OMP.cmake +++ b/OMP.cmake @@ -30,6 +30,9 @@ # ARMClang = ARM Compiler based on Clang (arm.com) # These are only added in CMake 3.20: # NVHPC = NVIDIA HPC SDK Compiler (nvidia.com) +# These are only added in CMake 3.21 +# Fujitsu = Fujitsu HPC compiler (Trad mode) +# FujitsuClang = Fujitsu HPC compiler (Clang mode) # CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have: @@ -39,8 +42,9 @@ # -#predefined offload flags based on compiler id - +# predefined offload flags based on compiler id and vendor, +# the format is (COMPILER and VENDOR must be UPPERCASE): +# Compiler: OMP_FLAGS_OFFLOAD__ set(OMP_FLAGS_OFFLOAD_INTEL -qnextgen -fiopenmp -fopenmp-targets=spir64) @@ -56,15 +60,25 @@ set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG -march=) # prefix only, arch appended by the vendor:arch tuple +# for standard (non-offload) omp, the format is (COMPILER and ARCH must be UPPERCASE): +# Compiler: OMP_FLAGS_CPU__ +# Linker: OMP_LINK_FLAGS_CPU__ + +set(OMP_FLAGS_CPU_FUJITSU + -Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16) +set(OMP_LINK_FLAGS_CPU_FUJITSU + -Kopenmp) + set(OMP_FLAGS_CPU_INTEL -qopt-streaming-stores=always) + set(OMP_FLAGS_CPU_GNU_PPC64LE -mcpu=native) + set(OMP_FLAGS_CPU_XL -O5 -qarch=auto -qtune=auto) -# NEC -set(OMP_FLAGS_CPU_NEC -O4 -finline) +set(OMP_FLAGS_CPU_NEC -O4 -finline) # CMake doesn't detect this so it's meant to be chosen by register_flag_optional(ARCH) register_flag_optional(CMAKE_CXX_COMPILER "Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)" @@ -122,6 +136,12 @@ macro(setup) ${ARCH} ) + register_append_compiler_and_arch_specific_link_flags( + OMP_LINK_FLAGS_CPU + ${COMPILER} + ${ARCH} + ) + elseif ("${OFFLOAD}" STREQUAL ON) # offload but with custom flags register_definitions(OMP_TARGET_GPU) diff --git a/OpenMP.make b/OpenMP.make index dde3f75..e81d252 100644 --- a/OpenMP.make +++ b/OpenMP.make @@ -3,7 +3,8 @@ ifndef COMPILER define compiler_help Set COMPILER to change flags (defaulting to GNU). Available compilers are: - CLANG CRAY GNU GNU_PPC INTEL XL PGI NEC ARMCLANG AOMP + CLANG CRAY GNU GNU_PPC INTEL XL PGI + NEC ARMCLANG AOMP FUJITSU Note: GCC on PPC requires -mcpu=native instead of -march=native so we have a special case for it @@ -49,6 +50,7 @@ COMPILER_XL = xlc++ COMPILER_PGI = pgc++ COMPILER_NEC = /opt/nec/ve/bin/nc++ COMPILER_AOMP = clang++ +COMPILER_FUJITSU=FCC CXX = $(COMPILER_$(COMPILER)) FLAGS_GNU = -O3 -std=c++11 -march=native @@ -61,6 +63,7 @@ FLAGS_PGI = -O3 -std=c++11 FLAGS_NEC = -O4 -finline -std=c++11 FLAGS_ARMCLANG = -O3 -std=c++11 FLAGS_AOMP = -O3 -std=c++11 +FLAGS_FUJITSU=-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16 CXXFLAGS = $(FLAGS_$(COMPILER)) # OpenMP flags for CPUs @@ -73,6 +76,7 @@ OMP_CLANG_CPU = -fopenmp=libomp OMP_XL_CPU = -qsmp=omp -qthreaded OMP_PGI_CPU = -mp OMP_NEC_CPU = -fopenmp +OMP_FUJITSU_CPU=-Kopenmp # OpenMP flags for NVIDIA OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU diff --git a/ci-prepare-bionic.sh b/ci-prepare-bionic.sh index fb69c05..fa3b2d2 100755 --- a/ci-prepare-bionic.sh +++ b/ci-prepare-bionic.sh @@ -134,20 +134,18 @@ setup_aocc() { setup_nvhpc() { echo "Preparing Nvidia HPC SDK" - local tarball="nvhpc.tar.gz" -# local url="http://localhost:8000/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz" - local url="https://developer.download.nvidia.com/hpc-sdk/21.2/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz" - +# local url="http://localhost:8000/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz" + local url="https://developer.download.nvidia.com/hpc-sdk/21.5/nvhpc_2021_215_Linux_x86_64_cuda_11.3.tar.gz" get_and_untar "$tarball" "$url" - local sdk_dir="$PWD/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2" + local sdk_dir="$PWD/nvhpc_2021_215_Linux_x86_64_cuda_11.3/install_components/Linux_x86_64/21.5" local bin_dir="$sdk_dir/compilers/bin" "$bin_dir/makelocalrc" "$bin_dir" -x export_var NVHPC_NVCXX "$bin_dir/nvc++" - export_var NVHPC_NVCC "$sdk_dir/cuda/11.2/bin/nvcc" - export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.2" + export_var NVHPC_NVCC "$sdk_dir/cuda/11.3/bin/nvcc" + export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.3" echo "Installed CUDA versions:" ls "$sdk_dir/cuda" verify_bin_exists "$NVHPC_NVCXX" diff --git a/ci-test-compile.sh b/ci-test-compile.sh index 456f836..00ca718 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -124,7 +124,7 @@ run_build() { AMD_ARCH="gfx_903" NV_ARCH="sm_70" -NV_ARCH_CCXY="cuda11.2,cc80" +NV_ARCH_CCXY="cuda11.3,cc80" build_gcc() { local name="gcc_build" diff --git a/register_models.cmake b/register_models.cmake index 82e7243..9f4cb57 100644 --- a/register_models.cmake +++ b/register_models.cmake @@ -20,7 +20,9 @@ # macro(wipe_gcc_style_optimisation_flags VAR) - string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}}) + if(${VAR}) + string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}}) + endif() endmacro() macro(register_link_library) @@ -41,7 +43,7 @@ macro(register_append_link_flags) list(APPEND LINK_FLAGS ${ARGN}) endmacro() -macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH) +function(bind_cxx_and_arch OUT PREFIX CXX ARCH) string(TOUPPER ${CXX} _CXX) string(TOUPPER ${ARCH} _ARCH) set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}_${_ARCH}}") @@ -52,6 +54,17 @@ macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH) if (_CXX_ARCH_SPECIFIC_FLAGS) register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS}) endif () + set(${OUT} "${_CXX_ARCH_SPECIFIC_FLAGS}" PARENT_SCOPE) +endfunction() + +macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH) + bind_cxx_and_arch(OUT ${PREFIX} ${CXX} ${ARCH}) + register_append_cxx_flags(ANY ${OUT}) +endmacro() + +macro(register_append_compiler_and_arch_specific_link_flags PREFIX CXX ARCH) + bind_cxx_and_arch(OUT ${PREFIX} ${CXX} ${ARCH}) + register_append_link_flags(${OUT}) endmacro() macro(register_definitions)