BabelStream/OMP.cmake
Tom Lin 14aefecc57 Re-add all compile and arch dependent flags
Fix ACC not linking on CMake < 3.16
Fix CUDA warnings for CMP0104 and avoid repeated -O[n] flags
Fix ComputeCpp not picking up custom flags
[CI] Highlight compiler warnings
[CI] Don't skip remaining tests when one fails
[CI] Add CMake 3.13, 3.15, 3.18 checks
2021-03-11 15:46:23 +00:00

177 lines
6.2 KiB
CMake

# Compiler ID for reference (as of CMake 3.13)
# Absoft = Absoft Fortran (absoft.com)
# ADSP = Analog VisualDSP++ (analog.com)
# AppleClang = Apple Clang (apple.com)
# ARMCC = ARM Compiler (arm.com)
# Bruce = Bruce C Compiler
# CCur = Concurrent Fortran (ccur.com)
# Clang = LLVM Clang (clang.llvm.org)
# Cray = Cray Compiler (cray.com)
# Embarcadero, Borland = Embarcadero (embarcadero.com)
# G95 = G95 Fortran (g95.org)
# GNU = GNU Compiler Collection (gcc.gnu.org)
# HP = Hewlett-Packard Compiler (hp.com)
# IAR = IAR Systems (iar.com)
# Intel = Intel Compiler (intel.com)
# MIPSpro = SGI MIPSpro (sgi.com)
# MSVC = Microsoft Visual Studio (microsoft.com)
# NVIDIA = NVIDIA CUDA Compiler (nvidia.com)
# OpenWatcom = Open Watcom (openwatcom.org)
# PGI = The Portland Group (pgroup.com)
# Flang = Flang Fortran Compiler
# PathScale = PathScale (pathscale.com)
# SDCC = Small Device C Compiler (sdcc.sourceforge.net)
# SunPro = Oracle Solaris Studio (oracle.com)
# TI = Texas Instruments (ti.com)
# TinyCC = Tiny C Compiler (tinycc.org)
# XL, VisualAge, zOS = IBM XL (ibm.com)
# These are only added in CMake 3.15:
# ARMClang = ARM Compiler based on Clang (arm.com)
# These are only added in CMake 3.20:
# NVHPC = NVIDIA HPC SDK Compiler (nvidia.com)
# CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have:
# Power9 = ppc64le
# x64 = x86_64
# arm64 = aarch64
#
#predefined offload flags based on compiler id
set(OMP_FLAGS_OFFLOAD_INTEL
-qnextgen -fiopenmp -fopenmp-targets=spir64)
set(OMP_FLAGS_OFFLOAD_GNU_NVIDIA
-foffload=nvptx-none)
set(OMP_FLAGS_OFFLOAD_GNU_AMD
-foffload=amdgcn-amdhsa)
set(OMP_FLAGS_OFFLOAD_CLANG_NVIDIA
-fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda)
set(OMP_FLAGS_OFFLOAD_CLANG_AMD
-fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa)
set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG
-march=) # prefix only, arch appended by the vendor:arch tuple
set(OMP_FLAGS_CPU_INTEL
-qopt-streaming-stores=always)
set(OMP_FLAGS_CPU_GNU_PPC64LE
-mcpu=native)
set(OMP_FLAGS_CPU_XL
-O5 -qarch=auto -qtune=auto)
# NEC
set(OMP_FLAGS_CPU_NEC -O4 -finline)
register_flag_optional(CMAKE_CXX_COMPILER
"Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)"
"c++")
register_flag_optional(ARCH
"This overrides CMake's CMAKE_SYSTEM_PROCESSOR detection which uses (uname -p), this is mainly for use with
specialised accelerators only and not to be confused with offload which is is mutually exclusive with this.
Supported values are:
- NEC"
"")
register_flag_optional(OFFLOAD
"Whether to use OpenMP offload, the format is <VENDOR:ARCH?>|ON|OFF.
We support a small set of known offload flags for clang, gcc, and icpx.
However, as offload support is rapidly evolving, we recommend you directly supply them via OFFLOAD_FLAGS.
For example:
* OFFLOAD=NVIDIA:sm_60
* OFFLOAD=AMD:gfx906
* OFFLOAD=INTEL
* OFFLOAD=ON OFFLOAD_FLAGS=..."
OFF)
register_flag_optional(OFFLOAD_FLAGS
"If OFFLOAD is enabled, this *overrides* the default offload flags"
"")
register_flag_optional(OFFLOAD_APPEND_LINK_FLAG
"If enabled, this appends all resolved offload flags (OFFLOAD=<vendor:arch> or directly from OFFLOAD_FLAGS) to the link flags.
This is required for most offload implementations so that offload libraries can linked correctly."
ON)
macro(setup)
set(CMAKE_CXX_STANDARD 17)
find_package(OpenMP REQUIRED)
register_link_library(OpenMP::OpenMP_CXX)
string(TOUPPER ${CMAKE_CXX_COMPILER_ID} COMPILER)
if(NOT ARCH)
string(TOUPPER ${CMAKE_SYSTEM_PROCESSOR} ARCH)
else()
message(STATUS "Using custom arch: ${ARCH}")
endif()
if (("${OFFLOAD}" STREQUAL OFF) OR (NOT DEFINED OFFLOAD))
# no offload
# resolve the CPU specific flags
# starting with ${COMPILER_VENDOR}_${PLATFORM_ARCH}, then try ${COMPILER_VENDOR}, and then give up
register_append_compiler_and_arch_specific_cxx_flags(
OMP_FLAGS_CPU
${COMPILER}
${ARCH}
)
elseif ("${OFFLOAD}" STREQUAL ON)
# offload but with custom flags
register_definitions(OMP_TARGET_GPU)
separate_arguments(OFFLOAD_FLAGS)
set(OMP_FLAGS ${OFFLOAD_FLAGS})
elseif ((DEFINED OFFLOAD) AND OFFLOAD_FLAGS)
# offload but OFFLOAD_FLAGS overrides
register_definitions(OMP_TARGET_GPU)
separate_arguments(OFFLOAD_FLAGS)
list(OMP_FLAGS APPEND ${OFFLOAD_FLAGS})
else ()
# handle the vendor:arch value
string(REPLACE ":" ";" OFFLOAD_TUPLE "${OFFLOAD}")
list(LENGTH OFFLOAD_TUPLE LEN)
if (LEN EQUAL 1)
# offload with <vendor> tuple
list(GET OFFLOAD_TUPLE 0 OFFLOAD_VENDOR)
# append OMP_FLAGS_OFFLOAD_<vendor> if exists
list(APPEND OMP_FLAGS ${OMP_FLAGS_OFFLOAD_${OFFLOAD_VENDOR}})
elseif (LEN EQUAL 2)
# offload with <vendor:arch> tuple
list(GET OFFLOAD_TUPLE 0 OFFLOAD_VENDOR)
list(GET OFFLOAD_TUPLE 1 OFFLOAD_ARCH)
# append OMP_FLAGS_OFFLOAD_<compiler>_<vendor> if exists
list(APPEND OMP_FLAGS ${OMP_FLAGS_OFFLOAD_${COMPILER}_${OFFLOAD_VENDOR}})
# append offload arch if OMP_FLAGS_OFFLOAD_<compiler>_ARCH_FLAG if exists
if (DEFINED OMP_FLAGS_OFFLOAD_${COMPILER}_ARCH_FLAG)
list(APPEND OMP_FLAGS
"${OMP_FLAGS_OFFLOAD_${COMPILER}_ARCH_FLAG}${OFFLOAD_ARCH}")
endif ()
else ()
message(FATAL_ERROR "Unrecognised OFFLOAD format: `${OFFLOAD}`, consider directly using OFFLOAD_FLAGS")
endif ()
endif ()
message(STATUS "OMP CXX flags : ${OMP_FLAGS}")
message(STATUS "OMP Link flags : ${OMP_LINK_FLAGS}")
# propagate flags to linker so that it links with the offload stuff as well
register_append_cxx_flags(ANY ${OMP_FLAGS})
if (OFFLOAD_APPEND_LINK_FLAG)
register_append_link_flags(${OMP_FLAGS})
endif ()
endmacro()