Merge remote-tracking branch 'upstream/master' into bare_hc

Conflicts:
	CMakeLists.txt
This commit is contained in:
Peter Steinbach 2017-02-27 14:35:11 +01:00
commit 62ea5e3ed6
18 changed files with 387 additions and 552 deletions

19
.gitignore vendored
View File

@ -1,18 +1,19 @@
gpu-stream-cuda cuda-stream
gpu-stream-ocl ocl-stream
gpu-stream-acc omp-stream
gpu-stream-omp3 acc-stream
gpu-stream-omp45 raja-stream
gpu-stream-sycl kokkos-stream
sycl-stream
hip-stream
*.o *.o
*.bc
*.sycl
*.tar *.tar
*.gz *.gz
.DS_Store .DS_Store
CMakeCache.txt
CMakeFiles/
cmake_install.cmake
Makefile Makefile

View File

@ -1,238 +0,0 @@
cmake_minimum_required(VERSION 3.2)
if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
endif()
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
if(NOT DEFINED HCC_PATH)
if(NOT DEFINED ENV{HCC_PATH})
set(HCC_PATH "/opt/rocm/" CACHE PATH "Path to which HCC has been installed")
else()
set(HCC_PATH $ENV{HCC_PATH} CACHE PATH "Path to which HCC has been installed")
endif()
endif()
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
project(gpu-stream)
include(CheckIncludeFileCXX)
include(CheckCXXCompilerFlag)
set(gpu-stream_VERSION_MAJOR 2)
set(gpu-stream_VERSION_MINOR 2)
configure_file(common.h.in common.h)
include_directories(${CMAKE_BINARY_DIR})
# Use 'Release' if no build type specified
if (NOT CMAKE_BUILD_TYPE)
message("No CMAKE_BUILD_TYPE specified, defaulting to 'Release'")
set(CMAKE_BUILD_TYPE "Release")
endif()
# If using the Cray compiler, manually add the C++11 flag because setting the
# standard through CMake as above doesn't set this flag with Cray
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray")
list(APPEND CMAKE_CXX_FLAGS -hstd=c++11)
endif ()
#-------------------------------------------------------------------------------
# HIP
#-------------------------------------------------------------------------------
find_package(HIP QUIET)
if(${HIP_FOUND})
list(APPEND HIP_HIPCC_FLAGS --std=c++11)
hip_add_executable(gpu-stream-hip main.cpp HIPStream.cu)
target_compile_definitions(gpu-stream-hip PUBLIC HIP)
else()
message("Skipping HIP...")
endif()
#-------------------------------------------------------------------------------
# CUDA
#-------------------------------------------------------------------------------
find_package(CUDA 7.0 QUIET)
set(FLAG True)
if ("${CMAKE_SYSTEM_NAME}" MATCHES "Darwin")
execute_process(COMMAND xcodebuild -version COMMAND head -n 1 OUTPUT_VARIABLE XCODE_VERSION)
if ("${XCODE_VERSION}" MATCHES "Xcode 7.3.1")
message("Xcode version not supported by CUDA")
set(FLAG False)
endif ()
endif ()
if (${FLAG} AND ${CUDA_FOUND})
list(APPEND CUDA_NVCC_FLAGS --std=c++11)
cuda_add_executable(gpu-stream-cuda main.cpp CUDAStream.cu)
target_compile_definitions(gpu-stream-cuda PUBLIC CUDA)
else ()
message("Skipping CUDA...")
endif ()
#-------------------------------------------------------------------------------
# OpenCL
#-------------------------------------------------------------------------------
find_package(OpenCL QUIET)
if (${OpenCL_FOUND})
add_executable(gpu-stream-ocl main.cpp OCLStream.cpp)
target_compile_definitions(gpu-stream-ocl PUBLIC OCL)
target_link_libraries(gpu-stream-ocl ${OpenCL_LIBRARY})
else ()
message("Skipping OpenCL...")
endif ()
#-------------------------------------------------------------------------------
# OpenACC
#-------------------------------------------------------------------------------
# Check compiler supports an OpenACC flag
include(CheckCXXCompilerFlag)
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
CHECK_CXX_COMPILER_FLAG(-fopenacc OPENACC)
if (OPENACC)
list (APPEND CMAKE_EXE_LINKER_FLAGS -fopenacc)
endif ()
elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "PGI")
CHECK_CXX_COMPILER_FLAG(-acc OPENACC)
elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray")
CHECK_CXX_COMPILER_FLAG(-hacc=openacc OPENACC)
endif ()
if (OPENACC)
add_executable(gpu-stream-acc main.cpp ACCStream.cpp)
target_compile_definitions(gpu-stream-acc PUBLIC ACC)
else ()
message("Skipping OpenACC...")
endif ()
#-------------------------------------------------------------------------------
# OpenMP 3.0
#-------------------------------------------------------------------------------
find_package(OpenMP QUIET)
if (${OpenMP_FOUND})
add_executable(gpu-stream-omp3 main.cpp OMP3Stream.cpp)
target_compile_definitions(gpu-stream-omp3 PUBLIC OMP3)
else ()
message("Skipping OpenMP 3...")
endif ()
#-------------------------------------------------------------------------------
# OpenMP 4.5
#-------------------------------------------------------------------------------
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray")
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.5)
add_executable(gpu-stream-omp45 main.cpp OMP45Stream.cpp)
target_compile_definitions(gpu-stream-omp45 PUBLIC OMP45)
endif ()
endif ()
#-------------------------------------------------------------------------------
# RAJA
#-------------------------------------------------------------------------------
if (RAJA_PATH)
find_package(OpenMP)
find_package(CUDA 7.5)
list(APPEND CUDA_NVCC_FLAGS "-arch compute_35")
list(APPEND CUDA_NVCC_FLAGS --expt-extended-lambda)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${OpenMP_CXX_FLAGS})
list(APPEND CUDA_NVCC_FLAGS -DUSE_RAJA)
cuda_include_directories(${RAJA_PATH}/include)
set_source_files_properties(RAJAStream.cpp PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
cuda_add_executable(gpu-stream-raja main.cpp RAJAStream.cpp)
target_compile_definitions(gpu-stream-raja PUBLIC USE_RAJA)
target_link_libraries(gpu-stream-raja "-L${RAJA_PATH}/lib -lRAJA")
else()
message("Skipping RAJA... (use -DRAJA_PATH=/path/to/raja to opt in)")
endif()
#-------------------------------------------------------------------------------
# Kokkos
#-------------------------------------------------------------------------------
if (KOKKOS_PATH)
if ("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
add_custom_target(gpu-stream-kokkos COMMAND make -f KokkosMakefile KOKKOS_PATH=${KOKKOS_PATH})
else()
message("Skipping Kokkos (requires Linux)")
endif()
else()
message("Skipping Kokkos... (use -DKOKKOS_PATH=/path/to/kokkos to opt in)")
endif()
#-------------------------------------------------------------------------------
# HCC
#-------------------------------------------------------------------------------
find_program(HCC_BINARY hcc HINTS ${HCC_PATH}/bin PATHS ${HCC_PATH}/bin)
if(EXISTS ${HCC_BINARY})
#can the following be tied to the target only?
set(OLD_CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
set(CMAKE_CXX_COMPILER ${HCC_BINARY})
add_executable(gpu-stream-hc main.cpp HCStream.cpp)
set_target_properties(gpu-stream-hc PROPERTIES
COMPILE_FLAGS "-hc -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include"
LINK_FLAGS "-hc -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive"
)
target_compile_definitions(gpu-stream-hc PUBLIC HC)
set_property(TARGET gpu-stream-ocl APPEND PROPERTY COMPILE_FLAGS "-I/opt/rocm/opencl/include/ -stdlib=libstdc++")
set_property(TARGET gpu-stream-ocl APPEND PROPERTY LINK_FLAGS "-L/opt/rocm/opencl/lib/x86_64 -lstdc++ ")
set_property(TARGET gpu-stream-omp3 APPEND PROPERTY COMPILE_FLAGS "-I/opt/rocm/opencl/include/ -stdlib=libstdc++")
set_property(TARGET gpu-stream-omp3 APPEND PROPERTY LINK_FLAGS "-L/opt/rocm/opencl/lib/x86_64 -lstdc++ ")
else()
message("Skipping HC...")
endif()
#-------------------------------------------------------------------------------
# SYCL
#-------------------------------------------------------------------------------
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" OR
"${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
# Use C++14 if available, otherwise drop back to C++11
check_cxx_compiler_flag("-std=c++14" CXX14)
if (CXX14)
set(CMAKE_REQUIRED_FLAGS "-std=c++14")
else()
set(CMAKE_REQUIRED_FLAGS "-std=c++11")
endif()
endif()
check_include_file_cxx("CL/sycl.hpp" HAS_SYCL)
if (HAS_SYCL)
add_executable(gpu-stream-sycl main.cpp SYCLStream.cpp)
target_compile_definitions(gpu-stream-sycl PUBLIC SYCL)
# The user must define this in order to use FindComputeCpp
if (COMPUTECPP_PACKAGE_ROOT_DIR)
message(STATUS "Using ComputeCpp for SYCL compilation")
include(FindComputeCpp)
include_directories(${COMPUTECPP_INCLUDE_DIRECTORY})
set(SOURCE_NAME "SYCLStream")
target_compile_options(gpu-stream-sycl PUBLIC ${HOST_COMPILER_OPTIONS})
add_sycl_to_target(gpu-stream-sycl ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_NAME}.cpp
${CMAKE_CURRENT_BINARY_DIR})
else()
message(STATUS "Using header-only SYCL implementation")
set_property(TARGET gpu-stream-sycl PROPERTY CXX_STANDARD 14)
endif()
else ()
message("Skipping SYCL...")
endif (HAS_SYCL)

8
CUDA.make Normal file
View File

@ -0,0 +1,8 @@
cuda-stream: main.cpp CUDAStream.cu
nvcc -std=c++11 -O3 -DCUDA $^ $(EXTRA_FLAGS) -o $@
.PHONY: clean
clean:
rm -f cuda-stream

18
HIP.make Normal file
View File

@ -0,0 +1,18 @@
# TODO: HIP with HCC
HIPCC = hipcc
ifndef CUDA_PATH
ifeq (,$(wildcard /usr/local/bin/nvcc))
$(error /usr/local/bin/nvcc not found, set CUDA_PATH instead)
endif
endif
hip-stream: main.cpp HIPStream.cu
$(HIPCC) $(CXXFLAGS) -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@
.PHONY: clean
clean:
rm -f hip-stream

View File

@ -9,6 +9,7 @@
#include "hip/hip_runtime.h" #include "hip/hip_runtime.h"
#define TBSIZE 1024 #define TBSIZE 1024
#define DOT_NUM_BLOCKS 256
void check_error(void) void check_error(void)
{ {
@ -47,6 +48,9 @@ HIPStream<T>::HIPStream(const unsigned int ARRAY_SIZE, const int device_index)
array_size = ARRAY_SIZE; array_size = ARRAY_SIZE;
// Allocate the host array for partial sums for dot kernels
sums = (T*)malloc(sizeof(T) * DOT_NUM_BLOCKS);
// Check buffers fit on the device // Check buffers fit on the device
hipDeviceProp_t props; hipDeviceProp_t props;
hipGetDeviceProperties(&props, 0); hipGetDeviceProperties(&props, 0);
@ -60,6 +64,8 @@ HIPStream<T>::HIPStream(const unsigned int ARRAY_SIZE, const int device_index)
check_error(); check_error();
hipMalloc(&d_c, ARRAY_SIZE*sizeof(T)); hipMalloc(&d_c, ARRAY_SIZE*sizeof(T));
check_error(); check_error();
hipMalloc(&d_sum, DOT_NUM_BLOCKS*sizeof(T));
check_error();
} }
@ -172,6 +178,49 @@ void HIPStream<T>::triad()
} }
template <class T>
__global__ void dot_kernel(hipLaunchParm lp, const T * a, const T * b, T * sum, unsigned int array_size)
{
extern __shared__ __align__(sizeof(T)) unsigned char smem[];
T *tb_sum = reinterpret_cast<T*>(smem);
int i = blockDim.x * blockIdx.x + threadIdx.x;
const size_t local_i = threadIdx.x;
tb_sum[local_i] = 0.0;
for (; i < array_size; i += blockDim.x*gridDim.x)
tb_sum[local_i] += a[i] * b[i];
for (int offset = blockDim.x / 2; offset > 0; offset /= 2)
{
__syncthreads();
if (local_i < offset)
{
tb_sum[local_i] += tb_sum[local_i+offset];
}
}
if (local_i == 0)
sum[blockIdx.x] = tb_sum[local_i];
}
template <class T>
T HIPStream<T>::dot()
{
hipLaunchKernel(HIP_KERNEL_NAME(dot_kernel), dim3(DOT_NUM_BLOCKS), dim3(TBSIZE), sizeof(T)*TBSIZE, 0, d_a, d_b, d_sum, array_size);
check_error();
hipMemcpy(sums, d_sum, DOT_NUM_BLOCKS*sizeof(T), hipMemcpyDeviceToHost);
check_error();
T sum = 0.0;
for (int i = 0; i < DOT_NUM_BLOCKS; i++)
sum += sums[i];
return sum;
}
void listDevices(void) void listDevices(void)
{ {
// Get number of devices // Get number of devices

View File

@ -21,10 +21,15 @@ class HIPStream : public Stream<T>
protected: protected:
// Size of arrays // Size of arrays
unsigned int array_size; unsigned int array_size;
// Host array for partial sums for dot kernel
T *sums;
// Device side pointers to arrays // Device side pointers to arrays
T *d_a; T *d_a;
T *d_b; T *d_b;
T *d_c; T *d_c;
T *d_sum;
public: public:
@ -36,6 +41,7 @@ class HIPStream : public Stream<T>
virtual void add() override; virtual void add() override;
virtual void mul() override; virtual void mul() override;
virtual void triad() override; virtual void triad() override;
virtual T dot() override;
virtual void init_arrays(T initA, T initB, T initC) override; virtual void init_arrays(T initA, T initB, T initC) override;
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override; virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;

30
Kokkos.make Normal file
View File

@ -0,0 +1,30 @@
default: kokkos-stream
include $(KOKKOS_PATH)/Makefile.kokkos
ifndef TARGET
define target_help
Set TARGET to change to offload device. Defaulting to CPU.
Available targets are:
CPU (default)
GPU
endef
$(info $(target_help))
TARGET=CPU
endif
ifeq ($(TARGET), CPU)
TARGET_DEF = -DKOKKOS_TARGET_CPU
else ifeq ($(TARGET), GPU)
CXX = $(NVCC_WRAPPER)
TARGET_DEF =
endif
kokkos-stream: main.cpp KOKKOSStream.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(KOKKOS_LDFLAGS) main.cpp KOKKOSStream.cpp $(KOKKOS_LIBS) -o $@ -DKOKKOS $(TARGET_DEF) -O3 $(EXTRA_FLAGS)
.PHONY: clean
clean:
rm -f kokkos-stream

View File

@ -1,10 +0,0 @@
default: gpu-stream-kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
gpu-stream-kokkos: main.o KOKKOSStream.o
$(CXX) $(KOKKOS_LDFLAGS) $^ $(KOKKOS_LIBS) -o $@ -DKOKKOS -DKOKKOS_TARGET_CPU -O3
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) -c $< -DKOKKOS -DKOKKOS_TARGET_CPU -O3

View File

@ -1,11 +0,0 @@
default: gpu-stream-kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
gpu-stream-kokkos: main.o KOKKOSStream.o
$(CXX) $(KOKKOS_LDFLAGS) $^ $(KOKKOS_LIBS) -o $@ -DKOKKOS -O3
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(NVCC_WRAPPER) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) -c $< -DKOKKOS -O3

52
OpenACC.make Normal file
View File

@ -0,0 +1,52 @@
ifndef COMPILER
define compiler_help
Set COMPILER to ensure correct flags are set.
Available compilers are:
PGI CRAY
endef
$(info $(compiler_help))
endif
COMPILER_ = $(CXX)
COMPILER_PGI = pgc++
COMPILER_CRAY = CC
FLAGS_ = -O3 -std=c++11
FLAGS_PGI = -std=c++11 -O3 -acc
ifeq ($(COMPILER), PGI)
define target_help
Set a TARGET to ensure PGI targets the correct offload device.
Available targets are:
SNB, IVB, HSW
KEPLER, MAXWELL, PASCAL
HAWAII
endef
ifndef TARGET
$(error $(target_help))
endif
TARGET_FLAGS_SNB = -ta=multicore -tp=sandybridge
TARGET_FLAGS_IVB = -ta=multicore -tp=ivybridge
TARGET_FLAGS_HSW = -ta=multicore -tp=haswell
TARGET_FLAGS_KEPLER = -ta=nvidia:cc35
TARGET_FLAGS_MAXWELL = -ta=nvidia:cc50
TARGET_FLAGS_PASCAL = -ta=nvidia:cc60
TARGET_FLAGS_HAWAII = -ta=radeon:hawaii
ifeq ($(TARGET_FLAGS_$(TARGET)),)
$(error $(target_help))
endif
FLAGS_PGI += $(TARGET_FLAGS_$(TARGET))
endif
FLAGS_CRAY = -hstd=c++11
CXXFLAGS = $(FLAGS_$(COMPILER))
acc-stream: main.cpp ACCStream.cpp
$(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@
.PHONY: clean
clean:
rm -f acc-stream main.o ACCStream.o

39
OpenCL.make Normal file
View File

@ -0,0 +1,39 @@
ifndef COMPILER
define compiler_help
Set COMPILER to change flags (defaulting to GNU).
Available compilers are:
GNU CLANG INTEL CRAY
endef
$(info $(compiler_help))
COMPILER=GNU
endif
COMPILER_GNU = g++
COMPILER_CLANG = clang++
COMPILER_INTEL = icpc
COMPILER_CRAY = CC
CXX = $(COMPILER_$(COMPILER))
FLAGS_ = -O3 -std=c++11
FLAGS_GNU = -O3 -std=c++11
FLAGS_CLANG = -O3 -std=c++11
FLAGS_INTEL = -O3 -std=c++11
FLAGS_CRAY = -O3 -hstd=c++11
CXXFLAGS=$(FLAGS_$(COMPILER))
PLATFORM = $(shell uname -s)
ifeq ($(PLATFORM), Darwin)
LIBS = -framework OpenCL
else
LIBS = -lOpenCL
endif
ocl-stream: main.cpp OCLStream.cpp
$(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@
.PHONY: clean
clean:
rm -f ocl-stream

60
OpenMP.make Normal file
View File

@ -0,0 +1,60 @@
ifndef COMPILER
define compiler_help
Set COMPILER to change flags (defaulting to GNU).
Available compilers are:
CLANG CRAY GNU INTEL XL
endef
$(info $(compiler_help))
COMPILER=GNU
endif
ifndef TARGET
define target_help
Set TARGET to change device (defaulting to CPU).
Available targets are:
CPU NVIDIA
endef
$(info $(target_help))
TARGET=CPU
endif
COMPILER_GNU = g++
COMPILER_INTEL = icpc
COMPILER_CRAY = CC
COMPILER_CLANG = clang++
COMPILER_XL = xlc++
CXX = $(COMPILER_$(COMPILER))
FLAGS_GNU = -O3 -std=c++11
FLAGS_INTEL = -O3 -std=c++11 -xHOST
FLAGS_CRAY = -O3 -hstd=c++11
FLAGS_CLANG = -O3 -std=c++11
FLAGS_XL = -O5 -qarch=pwr8 -qtune=pwr8 -std=c++11
CXXFLAGS = $(FLAGS_$(COMPILER))
# OpenMP flags for CPUs
OMP_GNU_CPU = -fopenmp
OMP_INTEL_CPU = -qopenmp
OMP_CRAY_CPU = -homp
OMP_CLANG_CPU = -fopenmp=libomp
OMP_XL_CPU = -qsmp=omp -qthreaded
# OpenMP flags for NVIDIA
OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU
OMP_CLANG_NVIDIA = -DOMP_TARGET_GPU -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda
ifndef OMP_$(COMPILER)_$(TARGET)
$(error Targeting $(TARGET) with $(COMPILER) not supported)
endif
OMP = $(OMP_$(COMPILER)_$(TARGET))
omp-stream: main.cpp OMPStream.cpp
$(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@
.PHONY: clean
clean:
rm -f omp-stream

58
RAJA.make Normal file
View File

@ -0,0 +1,58 @@
ifndef TARGET
define target_help
Set TARGET to change to offload device. Defaulting to CPU.
Available targets are:
CPU (default)
GPU
endef
$(info $(target_help))
TARGET=CPU
endif
ifeq ($(TARGET), CPU)
ifndef COMPILER
define compiler_help
Set COMPILER to change flags (defaulting to GNU).
Available compilers are:
INTEL GNU CRAY XL
endef
$(info $(compiler_help))
COMPILER=GNU
endif
CXX_INTEL = icpc
CXX_GNU = g++
CXX_CRAY = CC
CXX_XL = xlc++
CXXFLAGS_INTEL = -O3 -std=c++11 -qopenmp
CXXFLAGS_GNU = -O3 -std=c++11 -fopenmp
CXXFLAGS_CRAY = -O3 -hstd=c++11
CXXFLAGS_XL = -O5 -std=c++11 -qarch=pwr8 -qtune=pwr8 -qsmp=omp -qthreaded
CXX = $(CXX_$(COMPILER))
CXXFLAGS = -DRAJA_TARGET_CPU $(CXXFLAGS_$(COMPILER))
else ifeq ($(TARGET), GPU)
CXX = nvcc
ifndef ARCH
define arch_help
Set ARCH to ensure correct GPU architecture.
Example:
ARCH=sm_35
endef
$(error $(arch_help))
endif
CXXFLAGS = --expt-extended-lambda -O3 -std=c++11 -x cu -Xcompiler -fopenmp -arch $(ARCH)
endif
raja-stream: main.cpp RAJAStream.cpp
$(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@
.PHONY: clean
clean:
rm -f raja-stream

36
README.android Normal file
View File

@ -0,0 +1,36 @@
Android (outdated instructions)
------------------
Assuming you have a recent Android NDK available, you can use the
toolchain that it provides to build GPU-STREAM. You should first
use the NDK to generate a standalone toolchain:
# Select a directory to install the toolchain to
ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain
${NDK}/build/tools/make-standalone-toolchain.sh \
--platform=android-14 \
--toolchain=arm-linux-androideabi-4.8 \
--install-dir=${ANDROID_NATIVE_TOOLCHAIN}
Make sure that the OpenCL headers and library (libOpenCL.so) are
available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`.
You should then be able to build GPU-STREAM:
make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++
Copy the executable and OpenCL kernels to the device:
adb push gpu-stream-ocl /data/local/tmp
adb push ocl-stream-kernels.cl /data/local/tmp
Run GPU-STREAM from an adb shell:
adb shell
cd /data/local/tmp
# Use float if device doesn't support double, and reduce array size
./gpu-stream-ocl --float -n 6 -s 10000000

View File

@ -23,45 +23,18 @@ Website
Usage Usage
----- -----
CMake 3.2 or above is required. Drivers, compiler and software applicable to whichever implementation you would like to build against is required.
Drivers, compiler and software applicable to whichever implementation you would like to build against. Our build system is designed to only build implementations in programming models that your system supports.
Generate the Makefile with `cmake .` We have supplied a series of Makefiles, one for each programming model, to assist with building.
The Makefiles contain common build options, and should be simple to customise for your needs too.
Android (outdated instructions) General usage is `make -f <Model>.make`
------------------ Common compiler flags and names can be set by passing a `COMPILER` option to Make, e.g. `make COMPILER=GNU`.
Some models allow specifying a CPU or GPU style target, and this can be set by passing a `TARGET` option to Make, e.g. `make TARGET=GPU`.
Assuming you have a recent Android NDK available, you can use the Pass in extra flags via the `EXTRA_FLAGS` option.
toolchain that it provides to build GPU-STREAM. You should first
use the NDK to generate a standalone toolchain:
# Select a directory to install the toolchain to The binaries are named in the form `<model>-stream`.
ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain
${NDK}/build/tools/make-standalone-toolchain.sh \
--platform=android-14 \
--toolchain=arm-linux-androideabi-4.8 \
--install-dir=${ANDROID_NATIVE_TOOLCHAIN}
Make sure that the OpenCL headers and library (libOpenCL.so) are
available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`.
You should then be able to build GPU-STREAM:
make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++
Copy the executable and OpenCL kernels to the device:
adb push gpu-stream-ocl /data/local/tmp
adb push ocl-stream-kernels.cl /data/local/tmp
Run GPU-STREAM from an adb shell:
adb shell
cd /data/local/tmp
# Use float if device doesn't support double, and reduce array size
./gpu-stream-ocl --float -n 6 -s 10000000
Results Results

12
SYCL.make Normal file
View File

@ -0,0 +1,12 @@
COMPUTECPP_FLAGS = $(shell computecpp_info --dump-device-compiler-flags)
sycl-stream: main.cpp SYCLStream.cpp SYCLStream.sycl
$(CXX) -O3 -std=c++11 -DSYCL main.cpp SYCLStream.cpp -include SYCLStream.sycl $(EXTRA_FLAGS) -lComputeCpp -lOpenCL -o $@
SYCLStream.sycl: SYCLStream.cpp
compute++ SYCLStream.cpp $(COMPUTECPP_FLAGS) -c
.PHONY: clean
clean:
rm -f sycl-stream SYCLStream.sycl SYCLStream.bc

View File

@ -1,248 +0,0 @@
#.rst:
# FindComputeCpp
#---------------
#
# Copyright 2016 Codeplay Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use these files except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#########################
# FindComputeCpp.cmake
#########################
#
# Tools for finding and building with ComputeCpp.
#
# User must define COMPUTECPP_PACKAGE_ROOT_DIR pointing to the ComputeCpp
# installation.
#
# Latest version of this file can be found at:
# https://github.com/codeplaysoftware/computecpp-sdk
# Require CMake version 3.2.2 or higher
cmake_minimum_required(VERSION 3.2.2)
# Check that a supported host compiler can be found
if(CMAKE_COMPILER_IS_GNUCXX)
# Require at least gcc 4.8
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
message(FATAL_ERROR
"host compiler - Not found! (gcc version must be at least 4.8)")
# Require the GCC dual ABI to be disabled for 5.1 or higher
elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1)
set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True")
message(STATUS
"host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)")
else()
message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}")
endif()
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# Require at least clang 3.6
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6)
message(FATAL_ERROR
"host compiler - Not found! (clang version must be at least 3.6)")
else()
message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}")
endif()
else()
message(WARNING
"host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)")
endif()
set(COMPUTECPP_64_BIT_DEFAULT ON)
option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode"
${COMPUTECPP_64_BIT_DEFAULT})
mark_as_advanced(COMPUTECPP_64_BIT_CODE)
# Find OpenCL package
find_package(OpenCL REQUIRED)
# Find ComputeCpp packagee
if(NOT COMPUTECPP_PACKAGE_ROOT_DIR)
message(FATAL_ERROR
"ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR")
else()
message(STATUS "ComputeCpp package - Found")
endif()
option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package")
# Obtain the path to compute++
find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS
${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
if (EXISTS ${COMPUTECPP_DEVICE_COMPILER})
mark_as_advanced(COMPUTECPP_DEVICE_COMPILER)
message(STATUS "compute++ - Found")
else()
message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER})")
endif()
# Obtain the path to computecpp_info
find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS
${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
if (EXISTS ${COMPUTECPP_INFO_TOOL})
mark_as_advanced(${COMPUTECPP_INFO_TOOL})
message(STATUS "computecpp_info - Found")
else()
message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL})")
endif()
# Obtain the path to the ComputeCpp runtime library
find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR}
HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib
DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH)
if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY})
mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY)
message(STATUS "libComputeCpp.so - Found")
else()
message(FATAL_ERROR "libComputeCpp.so - Not found!")
endif()
# Obtain the ComputeCpp include directory
set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)
if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY})
message(FATAL_ERROR "ComputeCpp includes - Not found!")
else()
message(STATUS "ComputeCpp includes - Found")
endif()
# Obtain the package version
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version"
OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
message(FATAL_ERROR "Package version - Error obtaining version!")
else()
mark_as_advanced(COMPUTECPP_PACKAGE_VERSION)
message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}")
endif()
# Obtain the device compiler flags
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags"
OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!")
else()
mark_as_advanced(COMPUTECPP_COMPILER_FLAGS)
message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}")
endif()
# Check if the platform is supported
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported"
OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED
RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
message(FATAL_ERROR "platform - Error checking platform support!")
else()
mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED)
if (COMPUTECPP_PLATFORM_IS_SUPPORTED)
message(STATUS "platform - your system can support ComputeCpp")
else()
message(STATUS "platform - your system CANNOT support ComputeCpp")
endif()
endif()
####################
# __build_sycl
####################
#
# Adds a custom target for running compute++ and adding a dependency for the
# resulting integration header.
#
# targetName : Name of the target.
# sourceFile : Source file to be compiled.
# binaryDir : Intermediate directory to output the integration header.
#
function(__build_spir targetName sourceFile binaryDir)
# Retrieve source file name.
get_filename_component(sourceFileName ${sourceFile} NAME)
# Set the path to the Sycl file.
set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl)
# Add any user-defined include to the device compiler
get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY
INCLUDE_DIRECTORIES)
set(device_compiler_includes "")
foreach(directory ${includeDirectories})
set(device_compiler_includes "-I${directory}" ${device_compiler_includes})
endforeach()
if (CMAKE_INCLUDE_PATH)
foreach(directory ${CMAKE_INCLUDE_PATH})
set(device_compiler_includes "-I${directory}"
${device_compiler_includes})
endforeach()
endif()
# Convert argument list format
separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS)
# Add custom command for running compute++
add_custom_command(
OUTPUT ${outputSyclFile}
COMMAND ${COMPUTECPP_DEVICE_COMPILER}
${COMPUTECPP_DEVICE_COMPILER_FLAGS}
-isystem ${COMPUTECPP_INCLUDE_DIRECTORY}
${COMPUTECPP_PLATFORM_SPECIFIC_ARGS}
${device_compiler_includes}
-o ${outputSyclFile}
-c ${sourceFile}
DEPENDS ${sourceFile}
WORKING_DIRECTORY ${binaryDir}
COMMENT "Building ComputeCpp integration header file ${outputSyclFile}")
# Add a custom target for the generated integration header
add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile})
# Add a dependency on the integration header
add_dependencies(${targetName} ${targetName}_integration_header)
# Force inclusion of the integration header for the host compiler
set(compileFlags -include ${outputSyclFile} "-Wall")
target_compile_options(${targetName} PUBLIC ${compileFlags})
# Set the host compiler C++ standard to C++11
set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11)
# Disable GCC dual ABI on GCC 5.1 and higher
if(COMPUTECPP_DISABLE_GCC_DUAL_ABI)
set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS
"_GLIBCXX_USE_CXX11_ABI=0")
endif()
endfunction()
#######################
# add_sycl_to_target
#######################
#
# Adds a SYCL compilation custom command associated with an existing
# target and sets a dependancy on that new command.
#
# targetName : Name of the target to add a SYCL to.
# sourceFile : Source file to be compiled for SYCL.
# binaryDir : Intermediate directory to output the integration header.
#
function(add_sycl_to_target targetName sourceFile binaryDir)
# Add custom target to run compute++ and generate the integration header
__build_spir(${targetName} ${sourceFile} ${binaryDir})
# Link with the ComputeCpp runtime library
target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY}
PUBLIC ${OpenCL_LIBRARIES})
endfunction(add_sycl_to_target)

View File

@ -15,7 +15,7 @@
#include <iomanip> #include <iomanip>
#include <cstring> #include <cstring>
#define VERSION_STRING "3.0" #define VERSION_STRING "3.1"
#include "Stream.h" #include "Stream.h"