Merge remote-tracking branch 'upstream/master' into bare_hc
Conflicts: CMakeLists.txt
This commit is contained in:
commit
62ea5e3ed6
19
.gitignore
vendored
19
.gitignore
vendored
@ -1,18 +1,19 @@
|
|||||||
|
|
||||||
gpu-stream-cuda
|
cuda-stream
|
||||||
gpu-stream-ocl
|
ocl-stream
|
||||||
gpu-stream-acc
|
omp-stream
|
||||||
gpu-stream-omp3
|
acc-stream
|
||||||
gpu-stream-omp45
|
raja-stream
|
||||||
gpu-stream-sycl
|
kokkos-stream
|
||||||
|
sycl-stream
|
||||||
|
hip-stream
|
||||||
|
|
||||||
*.o
|
*.o
|
||||||
|
*.bc
|
||||||
|
*.sycl
|
||||||
*.tar
|
*.tar
|
||||||
*.gz
|
*.gz
|
||||||
|
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
||||||
CMakeCache.txt
|
|
||||||
CMakeFiles/
|
|
||||||
cmake_install.cmake
|
|
||||||
Makefile
|
Makefile
|
||||||
|
|||||||
238
CMakeLists.txt
238
CMakeLists.txt
@ -1,238 +0,0 @@
|
|||||||
|
|
||||||
cmake_minimum_required(VERSION 3.2)
|
|
||||||
|
|
||||||
if(NOT DEFINED HIP_PATH)
|
|
||||||
if(NOT DEFINED ENV{HIP_PATH})
|
|
||||||
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
|
|
||||||
else()
|
|
||||||
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
|
|
||||||
|
|
||||||
if(NOT DEFINED HCC_PATH)
|
|
||||||
if(NOT DEFINED ENV{HCC_PATH})
|
|
||||||
set(HCC_PATH "/opt/rocm/" CACHE PATH "Path to which HCC has been installed")
|
|
||||||
else()
|
|
||||||
set(HCC_PATH $ENV{HCC_PATH} CACHE PATH "Path to which HCC has been installed")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
||||||
|
|
||||||
project(gpu-stream)
|
|
||||||
|
|
||||||
include(CheckIncludeFileCXX)
|
|
||||||
include(CheckCXXCompilerFlag)
|
|
||||||
|
|
||||||
set(gpu-stream_VERSION_MAJOR 2)
|
|
||||||
set(gpu-stream_VERSION_MINOR 2)
|
|
||||||
|
|
||||||
configure_file(common.h.in common.h)
|
|
||||||
include_directories(${CMAKE_BINARY_DIR})
|
|
||||||
|
|
||||||
# Use 'Release' if no build type specified
|
|
||||||
if (NOT CMAKE_BUILD_TYPE)
|
|
||||||
message("No CMAKE_BUILD_TYPE specified, defaulting to 'Release'")
|
|
||||||
set(CMAKE_BUILD_TYPE "Release")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# If using the Cray compiler, manually add the C++11 flag because setting the
|
|
||||||
# standard through CMake as above doesn't set this flag with Cray
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray")
|
|
||||||
list(APPEND CMAKE_CXX_FLAGS -hstd=c++11)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# HIP
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
find_package(HIP QUIET)
|
|
||||||
if(${HIP_FOUND})
|
|
||||||
list(APPEND HIP_HIPCC_FLAGS --std=c++11)
|
|
||||||
hip_add_executable(gpu-stream-hip main.cpp HIPStream.cu)
|
|
||||||
target_compile_definitions(gpu-stream-hip PUBLIC HIP)
|
|
||||||
else()
|
|
||||||
message("Skipping HIP...")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# CUDA
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
find_package(CUDA 7.0 QUIET)
|
|
||||||
set(FLAG True)
|
|
||||||
if ("${CMAKE_SYSTEM_NAME}" MATCHES "Darwin")
|
|
||||||
execute_process(COMMAND xcodebuild -version COMMAND head -n 1 OUTPUT_VARIABLE XCODE_VERSION)
|
|
||||||
if ("${XCODE_VERSION}" MATCHES "Xcode 7.3.1")
|
|
||||||
message("Xcode version not supported by CUDA")
|
|
||||||
set(FLAG False)
|
|
||||||
endif ()
|
|
||||||
endif ()
|
|
||||||
if (${FLAG} AND ${CUDA_FOUND})
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS --std=c++11)
|
|
||||||
cuda_add_executable(gpu-stream-cuda main.cpp CUDAStream.cu)
|
|
||||||
target_compile_definitions(gpu-stream-cuda PUBLIC CUDA)
|
|
||||||
else ()
|
|
||||||
message("Skipping CUDA...")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# OpenCL
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
find_package(OpenCL QUIET)
|
|
||||||
if (${OpenCL_FOUND})
|
|
||||||
add_executable(gpu-stream-ocl main.cpp OCLStream.cpp)
|
|
||||||
target_compile_definitions(gpu-stream-ocl PUBLIC OCL)
|
|
||||||
target_link_libraries(gpu-stream-ocl ${OpenCL_LIBRARY})
|
|
||||||
else ()
|
|
||||||
message("Skipping OpenCL...")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# OpenACC
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# Check compiler supports an OpenACC flag
|
|
||||||
include(CheckCXXCompilerFlag)
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
|
|
||||||
CHECK_CXX_COMPILER_FLAG(-fopenacc OPENACC)
|
|
||||||
if (OPENACC)
|
|
||||||
list (APPEND CMAKE_EXE_LINKER_FLAGS -fopenacc)
|
|
||||||
endif ()
|
|
||||||
elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "PGI")
|
|
||||||
CHECK_CXX_COMPILER_FLAG(-acc OPENACC)
|
|
||||||
elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray")
|
|
||||||
CHECK_CXX_COMPILER_FLAG(-hacc=openacc OPENACC)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (OPENACC)
|
|
||||||
add_executable(gpu-stream-acc main.cpp ACCStream.cpp)
|
|
||||||
target_compile_definitions(gpu-stream-acc PUBLIC ACC)
|
|
||||||
else ()
|
|
||||||
message("Skipping OpenACC...")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# OpenMP 3.0
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
find_package(OpenMP QUIET)
|
|
||||||
if (${OpenMP_FOUND})
|
|
||||||
add_executable(gpu-stream-omp3 main.cpp OMP3Stream.cpp)
|
|
||||||
target_compile_definitions(gpu-stream-omp3 PUBLIC OMP3)
|
|
||||||
else ()
|
|
||||||
message("Skipping OpenMP 3...")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# OpenMP 4.5
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray")
|
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.5)
|
|
||||||
add_executable(gpu-stream-omp45 main.cpp OMP45Stream.cpp)
|
|
||||||
target_compile_definitions(gpu-stream-omp45 PUBLIC OMP45)
|
|
||||||
endif ()
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# RAJA
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if (RAJA_PATH)
|
|
||||||
find_package(OpenMP)
|
|
||||||
find_package(CUDA 7.5)
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS "-arch compute_35")
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS --expt-extended-lambda)
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${OpenMP_CXX_FLAGS})
|
|
||||||
list(APPEND CUDA_NVCC_FLAGS -DUSE_RAJA)
|
|
||||||
cuda_include_directories(${RAJA_PATH}/include)
|
|
||||||
set_source_files_properties(RAJAStream.cpp PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
|
|
||||||
cuda_add_executable(gpu-stream-raja main.cpp RAJAStream.cpp)
|
|
||||||
target_compile_definitions(gpu-stream-raja PUBLIC USE_RAJA)
|
|
||||||
target_link_libraries(gpu-stream-raja "-L${RAJA_PATH}/lib -lRAJA")
|
|
||||||
else()
|
|
||||||
message("Skipping RAJA... (use -DRAJA_PATH=/path/to/raja to opt in)")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# Kokkos
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
if (KOKKOS_PATH)
|
|
||||||
if ("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
|
|
||||||
add_custom_target(gpu-stream-kokkos COMMAND make -f KokkosMakefile KOKKOS_PATH=${KOKKOS_PATH})
|
|
||||||
else()
|
|
||||||
message("Skipping Kokkos (requires Linux)")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
message("Skipping Kokkos... (use -DKOKKOS_PATH=/path/to/kokkos to opt in)")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# HCC
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
find_program(HCC_BINARY hcc HINTS ${HCC_PATH}/bin PATHS ${HCC_PATH}/bin)
|
|
||||||
if(EXISTS ${HCC_BINARY})
|
|
||||||
#can the following be tied to the target only?
|
|
||||||
set(OLD_CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
|
||||||
set(CMAKE_CXX_COMPILER ${HCC_BINARY})
|
|
||||||
|
|
||||||
add_executable(gpu-stream-hc main.cpp HCStream.cpp)
|
|
||||||
set_target_properties(gpu-stream-hc PROPERTIES
|
|
||||||
COMPILE_FLAGS "-hc -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include"
|
|
||||||
LINK_FLAGS "-hc -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive"
|
|
||||||
)
|
|
||||||
target_compile_definitions(gpu-stream-hc PUBLIC HC)
|
|
||||||
|
|
||||||
set_property(TARGET gpu-stream-ocl APPEND PROPERTY COMPILE_FLAGS "-I/opt/rocm/opencl/include/ -stdlib=libstdc++")
|
|
||||||
set_property(TARGET gpu-stream-ocl APPEND PROPERTY LINK_FLAGS "-L/opt/rocm/opencl/lib/x86_64 -lstdc++ ")
|
|
||||||
|
|
||||||
set_property(TARGET gpu-stream-omp3 APPEND PROPERTY COMPILE_FLAGS "-I/opt/rocm/opencl/include/ -stdlib=libstdc++")
|
|
||||||
set_property(TARGET gpu-stream-omp3 APPEND PROPERTY LINK_FLAGS "-L/opt/rocm/opencl/lib/x86_64 -lstdc++ ")
|
|
||||||
|
|
||||||
else()
|
|
||||||
message("Skipping HC...")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# SYCL
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" OR
|
|
||||||
"${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
|
|
||||||
# Use C++14 if available, otherwise drop back to C++11
|
|
||||||
check_cxx_compiler_flag("-std=c++14" CXX14)
|
|
||||||
if (CXX14)
|
|
||||||
set(CMAKE_REQUIRED_FLAGS "-std=c++14")
|
|
||||||
else()
|
|
||||||
set(CMAKE_REQUIRED_FLAGS "-std=c++11")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
check_include_file_cxx("CL/sycl.hpp" HAS_SYCL)
|
|
||||||
if (HAS_SYCL)
|
|
||||||
add_executable(gpu-stream-sycl main.cpp SYCLStream.cpp)
|
|
||||||
target_compile_definitions(gpu-stream-sycl PUBLIC SYCL)
|
|
||||||
|
|
||||||
# The user must define this in order to use FindComputeCpp
|
|
||||||
if (COMPUTECPP_PACKAGE_ROOT_DIR)
|
|
||||||
message(STATUS "Using ComputeCpp for SYCL compilation")
|
|
||||||
include(FindComputeCpp)
|
|
||||||
|
|
||||||
include_directories(${COMPUTECPP_INCLUDE_DIRECTORY})
|
|
||||||
|
|
||||||
set(SOURCE_NAME "SYCLStream")
|
|
||||||
|
|
||||||
target_compile_options(gpu-stream-sycl PUBLIC ${HOST_COMPILER_OPTIONS})
|
|
||||||
add_sycl_to_target(gpu-stream-sycl ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_NAME}.cpp
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
else()
|
|
||||||
message(STATUS "Using header-only SYCL implementation")
|
|
||||||
set_property(TARGET gpu-stream-sycl PROPERTY CXX_STANDARD 14)
|
|
||||||
endif()
|
|
||||||
else ()
|
|
||||||
message("Skipping SYCL...")
|
|
||||||
endif (HAS_SYCL)
|
|
||||||
8
CUDA.make
Normal file
8
CUDA.make
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
cuda-stream: main.cpp CUDAStream.cu
|
||||||
|
nvcc -std=c++11 -O3 -DCUDA $^ $(EXTRA_FLAGS) -o $@
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f cuda-stream
|
||||||
|
|
||||||
18
HIP.make
Normal file
18
HIP.make
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
|
||||||
|
# TODO: HIP with HCC
|
||||||
|
|
||||||
|
HIPCC = hipcc
|
||||||
|
|
||||||
|
ifndef CUDA_PATH
|
||||||
|
ifeq (,$(wildcard /usr/local/bin/nvcc))
|
||||||
|
$(error /usr/local/bin/nvcc not found, set CUDA_PATH instead)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
hip-stream: main.cpp HIPStream.cu
|
||||||
|
$(HIPCC) $(CXXFLAGS) -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f hip-stream
|
||||||
|
|
||||||
49
HIPStream.cu
49
HIPStream.cu
@ -9,6 +9,7 @@
|
|||||||
#include "hip/hip_runtime.h"
|
#include "hip/hip_runtime.h"
|
||||||
|
|
||||||
#define TBSIZE 1024
|
#define TBSIZE 1024
|
||||||
|
#define DOT_NUM_BLOCKS 256
|
||||||
|
|
||||||
void check_error(void)
|
void check_error(void)
|
||||||
{
|
{
|
||||||
@ -47,6 +48,9 @@ HIPStream<T>::HIPStream(const unsigned int ARRAY_SIZE, const int device_index)
|
|||||||
|
|
||||||
array_size = ARRAY_SIZE;
|
array_size = ARRAY_SIZE;
|
||||||
|
|
||||||
|
// Allocate the host array for partial sums for dot kernels
|
||||||
|
sums = (T*)malloc(sizeof(T) * DOT_NUM_BLOCKS);
|
||||||
|
|
||||||
// Check buffers fit on the device
|
// Check buffers fit on the device
|
||||||
hipDeviceProp_t props;
|
hipDeviceProp_t props;
|
||||||
hipGetDeviceProperties(&props, 0);
|
hipGetDeviceProperties(&props, 0);
|
||||||
@ -60,6 +64,8 @@ HIPStream<T>::HIPStream(const unsigned int ARRAY_SIZE, const int device_index)
|
|||||||
check_error();
|
check_error();
|
||||||
hipMalloc(&d_c, ARRAY_SIZE*sizeof(T));
|
hipMalloc(&d_c, ARRAY_SIZE*sizeof(T));
|
||||||
check_error();
|
check_error();
|
||||||
|
hipMalloc(&d_sum, DOT_NUM_BLOCKS*sizeof(T));
|
||||||
|
check_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -172,6 +178,49 @@ void HIPStream<T>::triad()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__global__ void dot_kernel(hipLaunchParm lp, const T * a, const T * b, T * sum, unsigned int array_size)
|
||||||
|
{
|
||||||
|
|
||||||
|
extern __shared__ __align__(sizeof(T)) unsigned char smem[];
|
||||||
|
T *tb_sum = reinterpret_cast<T*>(smem);
|
||||||
|
|
||||||
|
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
const size_t local_i = threadIdx.x;
|
||||||
|
|
||||||
|
tb_sum[local_i] = 0.0;
|
||||||
|
for (; i < array_size; i += blockDim.x*gridDim.x)
|
||||||
|
tb_sum[local_i] += a[i] * b[i];
|
||||||
|
|
||||||
|
for (int offset = blockDim.x / 2; offset > 0; offset /= 2)
|
||||||
|
{
|
||||||
|
__syncthreads();
|
||||||
|
if (local_i < offset)
|
||||||
|
{
|
||||||
|
tb_sum[local_i] += tb_sum[local_i+offset];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (local_i == 0)
|
||||||
|
sum[blockIdx.x] = tb_sum[local_i];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
T HIPStream<T>::dot()
|
||||||
|
{
|
||||||
|
hipLaunchKernel(HIP_KERNEL_NAME(dot_kernel), dim3(DOT_NUM_BLOCKS), dim3(TBSIZE), sizeof(T)*TBSIZE, 0, d_a, d_b, d_sum, array_size);
|
||||||
|
check_error();
|
||||||
|
|
||||||
|
hipMemcpy(sums, d_sum, DOT_NUM_BLOCKS*sizeof(T), hipMemcpyDeviceToHost);
|
||||||
|
check_error();
|
||||||
|
|
||||||
|
T sum = 0.0;
|
||||||
|
for (int i = 0; i < DOT_NUM_BLOCKS; i++)
|
||||||
|
sum += sums[i];
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
void listDevices(void)
|
void listDevices(void)
|
||||||
{
|
{
|
||||||
// Get number of devices
|
// Get number of devices
|
||||||
|
|||||||
@ -21,10 +21,15 @@ class HIPStream : public Stream<T>
|
|||||||
protected:
|
protected:
|
||||||
// Size of arrays
|
// Size of arrays
|
||||||
unsigned int array_size;
|
unsigned int array_size;
|
||||||
|
|
||||||
|
// Host array for partial sums for dot kernel
|
||||||
|
T *sums;
|
||||||
|
|
||||||
// Device side pointers to arrays
|
// Device side pointers to arrays
|
||||||
T *d_a;
|
T *d_a;
|
||||||
T *d_b;
|
T *d_b;
|
||||||
T *d_c;
|
T *d_c;
|
||||||
|
T *d_sum;
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -36,6 +41,7 @@ class HIPStream : public Stream<T>
|
|||||||
virtual void add() override;
|
virtual void add() override;
|
||||||
virtual void mul() override;
|
virtual void mul() override;
|
||||||
virtual void triad() override;
|
virtual void triad() override;
|
||||||
|
virtual T dot() override;
|
||||||
|
|
||||||
virtual void init_arrays(T initA, T initB, T initC) override;
|
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||||
|
|||||||
30
Kokkos.make
Normal file
30
Kokkos.make
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
|
||||||
|
default: kokkos-stream
|
||||||
|
|
||||||
|
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||||
|
|
||||||
|
ifndef TARGET
|
||||||
|
define target_help
|
||||||
|
Set TARGET to change to offload device. Defaulting to CPU.
|
||||||
|
Available targets are:
|
||||||
|
CPU (default)
|
||||||
|
GPU
|
||||||
|
endef
|
||||||
|
$(info $(target_help))
|
||||||
|
TARGET=CPU
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), CPU)
|
||||||
|
TARGET_DEF = -DKOKKOS_TARGET_CPU
|
||||||
|
else ifeq ($(TARGET), GPU)
|
||||||
|
CXX = $(NVCC_WRAPPER)
|
||||||
|
TARGET_DEF =
|
||||||
|
endif
|
||||||
|
|
||||||
|
kokkos-stream: main.cpp KOKKOSStream.cpp $(KOKKOS_CPP_DEPENDS)
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(KOKKOS_LDFLAGS) main.cpp KOKKOSStream.cpp $(KOKKOS_LIBS) -o $@ -DKOKKOS $(TARGET_DEF) -O3 $(EXTRA_FLAGS)
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f kokkos-stream
|
||||||
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
|
|
||||||
default: gpu-stream-kokkos
|
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
|
||||||
|
|
||||||
gpu-stream-kokkos: main.o KOKKOSStream.o
|
|
||||||
$(CXX) $(KOKKOS_LDFLAGS) $^ $(KOKKOS_LIBS) -o $@ -DKOKKOS -DKOKKOS_TARGET_CPU -O3
|
|
||||||
|
|
||||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) -c $< -DKOKKOS -DKOKKOS_TARGET_CPU -O3
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
|
|
||||||
default: gpu-stream-kokkos
|
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
|
||||||
|
|
||||||
gpu-stream-kokkos: main.o KOKKOSStream.o
|
|
||||||
$(CXX) $(KOKKOS_LDFLAGS) $^ $(KOKKOS_LIBS) -o $@ -DKOKKOS -O3
|
|
||||||
|
|
||||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
|
||||||
$(NVCC_WRAPPER) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) -c $< -DKOKKOS -O3
|
|
||||||
|
|
||||||
52
OpenACC.make
Normal file
52
OpenACC.make
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
|
||||||
|
ifndef COMPILER
|
||||||
|
define compiler_help
|
||||||
|
Set COMPILER to ensure correct flags are set.
|
||||||
|
Available compilers are:
|
||||||
|
PGI CRAY
|
||||||
|
endef
|
||||||
|
$(info $(compiler_help))
|
||||||
|
endif
|
||||||
|
|
||||||
|
COMPILER_ = $(CXX)
|
||||||
|
COMPILER_PGI = pgc++
|
||||||
|
COMPILER_CRAY = CC
|
||||||
|
|
||||||
|
FLAGS_ = -O3 -std=c++11
|
||||||
|
|
||||||
|
FLAGS_PGI = -std=c++11 -O3 -acc
|
||||||
|
ifeq ($(COMPILER), PGI)
|
||||||
|
define target_help
|
||||||
|
Set a TARGET to ensure PGI targets the correct offload device.
|
||||||
|
Available targets are:
|
||||||
|
SNB, IVB, HSW
|
||||||
|
KEPLER, MAXWELL, PASCAL
|
||||||
|
HAWAII
|
||||||
|
endef
|
||||||
|
ifndef TARGET
|
||||||
|
$(error $(target_help))
|
||||||
|
endif
|
||||||
|
TARGET_FLAGS_SNB = -ta=multicore -tp=sandybridge
|
||||||
|
TARGET_FLAGS_IVB = -ta=multicore -tp=ivybridge
|
||||||
|
TARGET_FLAGS_HSW = -ta=multicore -tp=haswell
|
||||||
|
TARGET_FLAGS_KEPLER = -ta=nvidia:cc35
|
||||||
|
TARGET_FLAGS_MAXWELL = -ta=nvidia:cc50
|
||||||
|
TARGET_FLAGS_PASCAL = -ta=nvidia:cc60
|
||||||
|
TARGET_FLAGS_HAWAII = -ta=radeon:hawaii
|
||||||
|
ifeq ($(TARGET_FLAGS_$(TARGET)),)
|
||||||
|
$(error $(target_help))
|
||||||
|
endif
|
||||||
|
|
||||||
|
FLAGS_PGI += $(TARGET_FLAGS_$(TARGET))
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
FLAGS_CRAY = -hstd=c++11
|
||||||
|
CXXFLAGS = $(FLAGS_$(COMPILER))
|
||||||
|
|
||||||
|
acc-stream: main.cpp ACCStream.cpp
|
||||||
|
$(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f acc-stream main.o ACCStream.o
|
||||||
39
OpenCL.make
Normal file
39
OpenCL.make
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
|
||||||
|
ifndef COMPILER
|
||||||
|
define compiler_help
|
||||||
|
Set COMPILER to change flags (defaulting to GNU).
|
||||||
|
Available compilers are:
|
||||||
|
GNU CLANG INTEL CRAY
|
||||||
|
|
||||||
|
endef
|
||||||
|
$(info $(compiler_help))
|
||||||
|
COMPILER=GNU
|
||||||
|
endif
|
||||||
|
|
||||||
|
COMPILER_GNU = g++
|
||||||
|
COMPILER_CLANG = clang++
|
||||||
|
COMPILER_INTEL = icpc
|
||||||
|
COMPILER_CRAY = CC
|
||||||
|
CXX = $(COMPILER_$(COMPILER))
|
||||||
|
|
||||||
|
FLAGS_ = -O3 -std=c++11
|
||||||
|
FLAGS_GNU = -O3 -std=c++11
|
||||||
|
FLAGS_CLANG = -O3 -std=c++11
|
||||||
|
FLAGS_INTEL = -O3 -std=c++11
|
||||||
|
FLAGS_CRAY = -O3 -hstd=c++11
|
||||||
|
CXXFLAGS=$(FLAGS_$(COMPILER))
|
||||||
|
|
||||||
|
PLATFORM = $(shell uname -s)
|
||||||
|
ifeq ($(PLATFORM), Darwin)
|
||||||
|
LIBS = -framework OpenCL
|
||||||
|
else
|
||||||
|
LIBS = -lOpenCL
|
||||||
|
endif
|
||||||
|
|
||||||
|
ocl-stream: main.cpp OCLStream.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f ocl-stream
|
||||||
|
|
||||||
60
OpenMP.make
Normal file
60
OpenMP.make
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
|
||||||
|
ifndef COMPILER
|
||||||
|
define compiler_help
|
||||||
|
Set COMPILER to change flags (defaulting to GNU).
|
||||||
|
Available compilers are:
|
||||||
|
CLANG CRAY GNU INTEL XL
|
||||||
|
|
||||||
|
endef
|
||||||
|
$(info $(compiler_help))
|
||||||
|
COMPILER=GNU
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef TARGET
|
||||||
|
define target_help
|
||||||
|
Set TARGET to change device (defaulting to CPU).
|
||||||
|
Available targets are:
|
||||||
|
CPU NVIDIA
|
||||||
|
|
||||||
|
endef
|
||||||
|
$(info $(target_help))
|
||||||
|
TARGET=CPU
|
||||||
|
endif
|
||||||
|
|
||||||
|
COMPILER_GNU = g++
|
||||||
|
COMPILER_INTEL = icpc
|
||||||
|
COMPILER_CRAY = CC
|
||||||
|
COMPILER_CLANG = clang++
|
||||||
|
COMPILER_XL = xlc++
|
||||||
|
CXX = $(COMPILER_$(COMPILER))
|
||||||
|
|
||||||
|
FLAGS_GNU = -O3 -std=c++11
|
||||||
|
FLAGS_INTEL = -O3 -std=c++11 -xHOST
|
||||||
|
FLAGS_CRAY = -O3 -hstd=c++11
|
||||||
|
FLAGS_CLANG = -O3 -std=c++11
|
||||||
|
FLAGS_XL = -O5 -qarch=pwr8 -qtune=pwr8 -std=c++11
|
||||||
|
CXXFLAGS = $(FLAGS_$(COMPILER))
|
||||||
|
|
||||||
|
# OpenMP flags for CPUs
|
||||||
|
OMP_GNU_CPU = -fopenmp
|
||||||
|
OMP_INTEL_CPU = -qopenmp
|
||||||
|
OMP_CRAY_CPU = -homp
|
||||||
|
OMP_CLANG_CPU = -fopenmp=libomp
|
||||||
|
OMP_XL_CPU = -qsmp=omp -qthreaded
|
||||||
|
|
||||||
|
# OpenMP flags for NVIDIA
|
||||||
|
OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU
|
||||||
|
OMP_CLANG_NVIDIA = -DOMP_TARGET_GPU -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda
|
||||||
|
|
||||||
|
ifndef OMP_$(COMPILER)_$(TARGET)
|
||||||
|
$(error Targeting $(TARGET) with $(COMPILER) not supported)
|
||||||
|
endif
|
||||||
|
|
||||||
|
OMP = $(OMP_$(COMPILER)_$(TARGET))
|
||||||
|
|
||||||
|
omp-stream: main.cpp OMPStream.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f omp-stream
|
||||||
58
RAJA.make
Normal file
58
RAJA.make
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
|
||||||
|
ifndef TARGET
|
||||||
|
define target_help
|
||||||
|
Set TARGET to change to offload device. Defaulting to CPU.
|
||||||
|
Available targets are:
|
||||||
|
CPU (default)
|
||||||
|
GPU
|
||||||
|
endef
|
||||||
|
$(info $(target_help))
|
||||||
|
TARGET=CPU
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), CPU)
|
||||||
|
|
||||||
|
ifndef COMPILER
|
||||||
|
define compiler_help
|
||||||
|
Set COMPILER to change flags (defaulting to GNU).
|
||||||
|
Available compilers are:
|
||||||
|
INTEL GNU CRAY XL
|
||||||
|
endef
|
||||||
|
$(info $(compiler_help))
|
||||||
|
COMPILER=GNU
|
||||||
|
endif
|
||||||
|
|
||||||
|
CXX_INTEL = icpc
|
||||||
|
CXX_GNU = g++
|
||||||
|
CXX_CRAY = CC
|
||||||
|
CXX_XL = xlc++
|
||||||
|
|
||||||
|
CXXFLAGS_INTEL = -O3 -std=c++11 -qopenmp
|
||||||
|
CXXFLAGS_GNU = -O3 -std=c++11 -fopenmp
|
||||||
|
CXXFLAGS_CRAY = -O3 -hstd=c++11
|
||||||
|
CXXFLAGS_XL = -O5 -std=c++11 -qarch=pwr8 -qtune=pwr8 -qsmp=omp -qthreaded
|
||||||
|
|
||||||
|
CXX = $(CXX_$(COMPILER))
|
||||||
|
CXXFLAGS = -DRAJA_TARGET_CPU $(CXXFLAGS_$(COMPILER))
|
||||||
|
|
||||||
|
else ifeq ($(TARGET), GPU)
|
||||||
|
CXX = nvcc
|
||||||
|
|
||||||
|
ifndef ARCH
|
||||||
|
define arch_help
|
||||||
|
Set ARCH to ensure correct GPU architecture.
|
||||||
|
Example:
|
||||||
|
ARCH=sm_35
|
||||||
|
endef
|
||||||
|
$(error $(arch_help))
|
||||||
|
endif
|
||||||
|
CXXFLAGS = --expt-extended-lambda -O3 -std=c++11 -x cu -Xcompiler -fopenmp -arch $(ARCH)
|
||||||
|
endif
|
||||||
|
|
||||||
|
raja-stream: main.cpp RAJAStream.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f raja-stream
|
||||||
|
|
||||||
36
README.android
Normal file
36
README.android
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
Android (outdated instructions)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Assuming you have a recent Android NDK available, you can use the
|
||||||
|
toolchain that it provides to build GPU-STREAM. You should first
|
||||||
|
use the NDK to generate a standalone toolchain:
|
||||||
|
|
||||||
|
# Select a directory to install the toolchain to
|
||||||
|
ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain
|
||||||
|
|
||||||
|
${NDK}/build/tools/make-standalone-toolchain.sh \
|
||||||
|
--platform=android-14 \
|
||||||
|
--toolchain=arm-linux-androideabi-4.8 \
|
||||||
|
--install-dir=${ANDROID_NATIVE_TOOLCHAIN}
|
||||||
|
|
||||||
|
Make sure that the OpenCL headers and library (libOpenCL.so) are
|
||||||
|
available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`.
|
||||||
|
|
||||||
|
You should then be able to build GPU-STREAM:
|
||||||
|
|
||||||
|
make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++
|
||||||
|
|
||||||
|
Copy the executable and OpenCL kernels to the device:
|
||||||
|
|
||||||
|
adb push gpu-stream-ocl /data/local/tmp
|
||||||
|
adb push ocl-stream-kernels.cl /data/local/tmp
|
||||||
|
|
||||||
|
Run GPU-STREAM from an adb shell:
|
||||||
|
|
||||||
|
adb shell
|
||||||
|
cd /data/local/tmp
|
||||||
|
|
||||||
|
# Use float if device doesn't support double, and reduce array size
|
||||||
|
./gpu-stream-ocl --float -n 6 -s 10000000
|
||||||
|
|
||||||
|
|
||||||
43
README.md
43
README.md
@ -23,45 +23,18 @@ Website
|
|||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
|
|
||||||
CMake 3.2 or above is required.
|
Drivers, compiler and software applicable to whichever implementation you would like to build against is required.
|
||||||
Drivers, compiler and software applicable to whichever implementation you would like to build against. Our build system is designed to only build implementations in programming models that your system supports.
|
|
||||||
|
|
||||||
Generate the Makefile with `cmake .`
|
We have supplied a series of Makefiles, one for each programming model, to assist with building.
|
||||||
|
The Makefiles contain common build options, and should be simple to customise for your needs too.
|
||||||
|
|
||||||
Android (outdated instructions)
|
General usage is `make -f <Model>.make`
|
||||||
------------------
|
Common compiler flags and names can be set by passing a `COMPILER` option to Make, e.g. `make COMPILER=GNU`.
|
||||||
|
Some models allow specifying a CPU or GPU style target, and this can be set by passing a `TARGET` option to Make, e.g. `make TARGET=GPU`.
|
||||||
|
|
||||||
Assuming you have a recent Android NDK available, you can use the
|
Pass in extra flags via the `EXTRA_FLAGS` option.
|
||||||
toolchain that it provides to build GPU-STREAM. You should first
|
|
||||||
use the NDK to generate a standalone toolchain:
|
|
||||||
|
|
||||||
# Select a directory to install the toolchain to
|
The binaries are named in the form `<model>-stream`.
|
||||||
ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain
|
|
||||||
|
|
||||||
${NDK}/build/tools/make-standalone-toolchain.sh \
|
|
||||||
--platform=android-14 \
|
|
||||||
--toolchain=arm-linux-androideabi-4.8 \
|
|
||||||
--install-dir=${ANDROID_NATIVE_TOOLCHAIN}
|
|
||||||
|
|
||||||
Make sure that the OpenCL headers and library (libOpenCL.so) are
|
|
||||||
available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`.
|
|
||||||
|
|
||||||
You should then be able to build GPU-STREAM:
|
|
||||||
|
|
||||||
make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++
|
|
||||||
|
|
||||||
Copy the executable and OpenCL kernels to the device:
|
|
||||||
|
|
||||||
adb push gpu-stream-ocl /data/local/tmp
|
|
||||||
adb push ocl-stream-kernels.cl /data/local/tmp
|
|
||||||
|
|
||||||
Run GPU-STREAM from an adb shell:
|
|
||||||
|
|
||||||
adb shell
|
|
||||||
cd /data/local/tmp
|
|
||||||
|
|
||||||
# Use float if device doesn't support double, and reduce array size
|
|
||||||
./gpu-stream-ocl --float -n 6 -s 10000000
|
|
||||||
|
|
||||||
|
|
||||||
Results
|
Results
|
||||||
|
|||||||
12
SYCL.make
Normal file
12
SYCL.make
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
COMPUTECPP_FLAGS = $(shell computecpp_info --dump-device-compiler-flags)
|
||||||
|
|
||||||
|
sycl-stream: main.cpp SYCLStream.cpp SYCLStream.sycl
|
||||||
|
$(CXX) -O3 -std=c++11 -DSYCL main.cpp SYCLStream.cpp -include SYCLStream.sycl $(EXTRA_FLAGS) -lComputeCpp -lOpenCL -o $@
|
||||||
|
|
||||||
|
SYCLStream.sycl: SYCLStream.cpp
|
||||||
|
compute++ SYCLStream.cpp $(COMPUTECPP_FLAGS) -c
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm -f sycl-stream SYCLStream.sycl SYCLStream.bc
|
||||||
@ -1,248 +0,0 @@
|
|||||||
#.rst:
|
|
||||||
# FindComputeCpp
|
|
||||||
#---------------
|
|
||||||
#
|
|
||||||
# Copyright 2016 Codeplay Software Ltd.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use these files except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
#########################
|
|
||||||
# FindComputeCpp.cmake
|
|
||||||
#########################
|
|
||||||
#
|
|
||||||
# Tools for finding and building with ComputeCpp.
|
|
||||||
#
|
|
||||||
# User must define COMPUTECPP_PACKAGE_ROOT_DIR pointing to the ComputeCpp
|
|
||||||
# installation.
|
|
||||||
#
|
|
||||||
# Latest version of this file can be found at:
|
|
||||||
# https://github.com/codeplaysoftware/computecpp-sdk
|
|
||||||
|
|
||||||
# Require CMake version 3.2.2 or higher
|
|
||||||
cmake_minimum_required(VERSION 3.2.2)
|
|
||||||
|
|
||||||
# Check that a supported host compiler can be found
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
|
||||||
# Require at least gcc 4.8
|
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
|
|
||||||
message(FATAL_ERROR
|
|
||||||
"host compiler - Not found! (gcc version must be at least 4.8)")
|
|
||||||
# Require the GCC dual ABI to be disabled for 5.1 or higher
|
|
||||||
elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1)
|
|
||||||
set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True")
|
|
||||||
message(STATUS
|
|
||||||
"host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)")
|
|
||||||
else()
|
|
||||||
message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}")
|
|
||||||
endif()
|
|
||||||
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
|
||||||
# Require at least clang 3.6
|
|
||||||
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6)
|
|
||||||
message(FATAL_ERROR
|
|
||||||
"host compiler - Not found! (clang version must be at least 3.6)")
|
|
||||||
else()
|
|
||||||
message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
message(WARNING
|
|
||||||
"host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(COMPUTECPP_64_BIT_DEFAULT ON)
|
|
||||||
option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode"
|
|
||||||
${COMPUTECPP_64_BIT_DEFAULT})
|
|
||||||
mark_as_advanced(COMPUTECPP_64_BIT_CODE)
|
|
||||||
|
|
||||||
# Find OpenCL package
|
|
||||||
find_package(OpenCL REQUIRED)
|
|
||||||
|
|
||||||
# Find ComputeCpp packagee
|
|
||||||
if(NOT COMPUTECPP_PACKAGE_ROOT_DIR)
|
|
||||||
message(FATAL_ERROR
|
|
||||||
"ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR")
|
|
||||||
else()
|
|
||||||
message(STATUS "ComputeCpp package - Found")
|
|
||||||
endif()
|
|
||||||
option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package")
|
|
||||||
|
|
||||||
# Obtain the path to compute++
|
|
||||||
find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS
|
|
||||||
${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
|
|
||||||
if (EXISTS ${COMPUTECPP_DEVICE_COMPILER})
|
|
||||||
mark_as_advanced(COMPUTECPP_DEVICE_COMPILER)
|
|
||||||
message(STATUS "compute++ - Found")
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER})")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Obtain the path to computecpp_info
|
|
||||||
find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS
|
|
||||||
${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
|
|
||||||
if (EXISTS ${COMPUTECPP_INFO_TOOL})
|
|
||||||
mark_as_advanced(${COMPUTECPP_INFO_TOOL})
|
|
||||||
message(STATUS "computecpp_info - Found")
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL})")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Obtain the path to the ComputeCpp runtime library
|
|
||||||
find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR}
|
|
||||||
HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib
|
|
||||||
DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH)
|
|
||||||
|
|
||||||
if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY})
|
|
||||||
mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY)
|
|
||||||
message(STATUS "libComputeCpp.so - Found")
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "libComputeCpp.so - Not found!")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Obtain the ComputeCpp include directory
|
|
||||||
set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)
|
|
||||||
if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY})
|
|
||||||
message(FATAL_ERROR "ComputeCpp includes - Not found!")
|
|
||||||
else()
|
|
||||||
message(STATUS "ComputeCpp includes - Found")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Obtain the package version
|
|
||||||
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version"
|
|
||||||
OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION
|
|
||||||
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
||||||
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
|
|
||||||
message(FATAL_ERROR "Package version - Error obtaining version!")
|
|
||||||
else()
|
|
||||||
mark_as_advanced(COMPUTECPP_PACKAGE_VERSION)
|
|
||||||
message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Obtain the device compiler flags
|
|
||||||
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags"
|
|
||||||
OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS
|
|
||||||
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
||||||
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
|
|
||||||
message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!")
|
|
||||||
else()
|
|
||||||
mark_as_advanced(COMPUTECPP_COMPILER_FLAGS)
|
|
||||||
message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Check if the platform is supported
|
|
||||||
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported"
|
|
||||||
OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED
|
|
||||||
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
||||||
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
|
|
||||||
message(FATAL_ERROR "platform - Error checking platform support!")
|
|
||||||
else()
|
|
||||||
mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED)
|
|
||||||
if (COMPUTECPP_PLATFORM_IS_SUPPORTED)
|
|
||||||
message(STATUS "platform - your system can support ComputeCpp")
|
|
||||||
else()
|
|
||||||
message(STATUS "platform - your system CANNOT support ComputeCpp")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
####################
|
|
||||||
# __build_sycl
|
|
||||||
####################
|
|
||||||
#
|
|
||||||
# Adds a custom target for running compute++ and adding a dependency for the
|
|
||||||
# resulting integration header.
|
|
||||||
#
|
|
||||||
# targetName : Name of the target.
|
|
||||||
# sourceFile : Source file to be compiled.
|
|
||||||
# binaryDir : Intermediate directory to output the integration header.
|
|
||||||
#
|
|
||||||
function(__build_spir targetName sourceFile binaryDir)
|
|
||||||
|
|
||||||
# Retrieve source file name.
|
|
||||||
get_filename_component(sourceFileName ${sourceFile} NAME)
|
|
||||||
|
|
||||||
# Set the path to the Sycl file.
|
|
||||||
set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl)
|
|
||||||
|
|
||||||
# Add any user-defined include to the device compiler
|
|
||||||
get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY
|
|
||||||
INCLUDE_DIRECTORIES)
|
|
||||||
set(device_compiler_includes "")
|
|
||||||
foreach(directory ${includeDirectories})
|
|
||||||
set(device_compiler_includes "-I${directory}" ${device_compiler_includes})
|
|
||||||
endforeach()
|
|
||||||
if (CMAKE_INCLUDE_PATH)
|
|
||||||
foreach(directory ${CMAKE_INCLUDE_PATH})
|
|
||||||
set(device_compiler_includes "-I${directory}"
|
|
||||||
${device_compiler_includes})
|
|
||||||
endforeach()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Convert argument list format
|
|
||||||
separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS)
|
|
||||||
|
|
||||||
# Add custom command for running compute++
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${outputSyclFile}
|
|
||||||
COMMAND ${COMPUTECPP_DEVICE_COMPILER}
|
|
||||||
${COMPUTECPP_DEVICE_COMPILER_FLAGS}
|
|
||||||
-isystem ${COMPUTECPP_INCLUDE_DIRECTORY}
|
|
||||||
${COMPUTECPP_PLATFORM_SPECIFIC_ARGS}
|
|
||||||
${device_compiler_includes}
|
|
||||||
-o ${outputSyclFile}
|
|
||||||
-c ${sourceFile}
|
|
||||||
DEPENDS ${sourceFile}
|
|
||||||
WORKING_DIRECTORY ${binaryDir}
|
|
||||||
COMMENT "Building ComputeCpp integration header file ${outputSyclFile}")
|
|
||||||
|
|
||||||
# Add a custom target for the generated integration header
|
|
||||||
add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile})
|
|
||||||
|
|
||||||
# Add a dependency on the integration header
|
|
||||||
add_dependencies(${targetName} ${targetName}_integration_header)
|
|
||||||
|
|
||||||
# Force inclusion of the integration header for the host compiler
|
|
||||||
set(compileFlags -include ${outputSyclFile} "-Wall")
|
|
||||||
target_compile_options(${targetName} PUBLIC ${compileFlags})
|
|
||||||
|
|
||||||
# Set the host compiler C++ standard to C++11
|
|
||||||
set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11)
|
|
||||||
|
|
||||||
# Disable GCC dual ABI on GCC 5.1 and higher
|
|
||||||
if(COMPUTECPP_DISABLE_GCC_DUAL_ABI)
|
|
||||||
set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS
|
|
||||||
"_GLIBCXX_USE_CXX11_ABI=0")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
endfunction()
|
|
||||||
|
|
||||||
#######################
|
|
||||||
# add_sycl_to_target
|
|
||||||
#######################
|
|
||||||
#
|
|
||||||
# Adds a SYCL compilation custom command associated with an existing
|
|
||||||
# target and sets a dependancy on that new command.
|
|
||||||
#
|
|
||||||
# targetName : Name of the target to add a SYCL to.
|
|
||||||
# sourceFile : Source file to be compiled for SYCL.
|
|
||||||
# binaryDir : Intermediate directory to output the integration header.
|
|
||||||
#
|
|
||||||
function(add_sycl_to_target targetName sourceFile binaryDir)
|
|
||||||
|
|
||||||
# Add custom target to run compute++ and generate the integration header
|
|
||||||
__build_spir(${targetName} ${sourceFile} ${binaryDir})
|
|
||||||
|
|
||||||
# Link with the ComputeCpp runtime library
|
|
||||||
target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY}
|
|
||||||
PUBLIC ${OpenCL_LIBRARIES})
|
|
||||||
|
|
||||||
endfunction(add_sycl_to_target)
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user