diff --git a/.gitignore b/.gitignore index 6ef20a5..a9748b3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1,19 @@ -gpu-stream-cuda -gpu-stream-ocl -gpu-stream-acc -gpu-stream-omp3 -gpu-stream-omp45 -gpu-stream-sycl +cuda-stream +ocl-stream +omp-stream +acc-stream +raja-stream +kokkos-stream +sycl-stream +hip-stream *.o +*.bc +*.sycl *.tar *.gz .DS_Store -CMakeCache.txt -CMakeFiles/ -cmake_install.cmake Makefile diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 6f3439e..0000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,198 +0,0 @@ - -cmake_minimum_required(VERSION 3.2) - -if(NOT DEFINED HIP_PATH) - if(NOT DEFINED ENV{HIP_PATH}) - set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") - else() - set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") - endif() -endif() -set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) - - -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -project(gpu-stream) - -include(CheckIncludeFileCXX) -include(CheckCXXCompilerFlag) - -set(gpu-stream_VERSION_MAJOR 2) -set(gpu-stream_VERSION_MINOR 2) - -configure_file(common.h.in common.h) -include_directories(${CMAKE_BINARY_DIR}) - -# Use 'Release' if no build type specified -if (NOT CMAKE_BUILD_TYPE) - message("No CMAKE_BUILD_TYPE specified, defaulting to 'Release'") - set(CMAKE_BUILD_TYPE "Release") -endif() - -# If using the Cray compiler, manually add the C++11 flag because setting the -# standard through CMake as above doesn't set this flag with Cray -if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray") - list(APPEND CMAKE_CXX_FLAGS -hstd=c++11) -endif () - -#------------------------------------------------------------------------------- -# HIP -#------------------------------------------------------------------------------- -find_package(HIP QUIET) -if(${HIP_FOUND}) - list(APPEND HIP_HIPCC_FLAGS --std=c++11) - hip_add_executable(gpu-stream-hip main.cpp HIPStream.cu) - target_compile_definitions(gpu-stream-hip PUBLIC HIP) -else() - message("Skipping HIP...") -endif() - -#------------------------------------------------------------------------------- -# CUDA -#------------------------------------------------------------------------------- -find_package(CUDA 7.0 QUIET) -set(FLAG True) -if ("${CMAKE_SYSTEM_NAME}" MATCHES "Darwin") - execute_process(COMMAND xcodebuild -version COMMAND head -n 1 OUTPUT_VARIABLE XCODE_VERSION) - if ("${XCODE_VERSION}" MATCHES "Xcode 7.3.1") - message("Xcode version not supported by CUDA") - set(FLAG False) - endif () -endif () -if (${FLAG} AND ${CUDA_FOUND}) - list(APPEND CUDA_NVCC_FLAGS --std=c++11) - cuda_add_executable(gpu-stream-cuda main.cpp CUDAStream.cu) - target_compile_definitions(gpu-stream-cuda PUBLIC CUDA) -else () - message("Skipping CUDA...") -endif () - -#------------------------------------------------------------------------------- -# OpenCL -#------------------------------------------------------------------------------- -find_package(OpenCL QUIET) -if (${OpenCL_FOUND}) - add_executable(gpu-stream-ocl main.cpp OCLStream.cpp) - target_compile_definitions(gpu-stream-ocl PUBLIC OCL) - target_link_libraries(gpu-stream-ocl ${OpenCL_LIBRARY}) -else () - message("Skipping OpenCL...") -endif () - -#------------------------------------------------------------------------------- -# OpenACC -#------------------------------------------------------------------------------- -# Check compiler supports an OpenACC flag -include(CheckCXXCompilerFlag) -if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") - CHECK_CXX_COMPILER_FLAG(-fopenacc OPENACC) - if (OPENACC) - list (APPEND CMAKE_EXE_LINKER_FLAGS -fopenacc) - endif () -elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "PGI") - CHECK_CXX_COMPILER_FLAG(-acc OPENACC) -elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray") - CHECK_CXX_COMPILER_FLAG(-hacc=openacc OPENACC) -endif () - -if (OPENACC) - add_executable(gpu-stream-acc main.cpp ACCStream.cpp) - target_compile_definitions(gpu-stream-acc PUBLIC ACC) -else () - message("Skipping OpenACC...") -endif () - -#------------------------------------------------------------------------------- -# OpenMP 3.0 -#------------------------------------------------------------------------------- -find_package(OpenMP QUIET) -if (${OpenMP_FOUND}) - add_executable(gpu-stream-omp3 main.cpp OMP3Stream.cpp) - target_compile_definitions(gpu-stream-omp3 PUBLIC OMP3) -else () - message("Skipping OpenMP 3...") -endif () - -#------------------------------------------------------------------------------- -# OpenMP 4.5 -#------------------------------------------------------------------------------- -if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.5) - add_executable(gpu-stream-omp45 main.cpp OMP45Stream.cpp) - target_compile_definitions(gpu-stream-omp45 PUBLIC OMP45) - endif () -endif () - -#------------------------------------------------------------------------------- -# RAJA -#------------------------------------------------------------------------------- - -if (RAJA_PATH) - find_package(OpenMP) - find_package(CUDA 7.5) - list(APPEND CUDA_NVCC_FLAGS "-arch compute_35") - list(APPEND CUDA_NVCC_FLAGS --expt-extended-lambda) - list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${OpenMP_CXX_FLAGS}) - list(APPEND CUDA_NVCC_FLAGS -DUSE_RAJA) - cuda_include_directories(${RAJA_PATH}/include) - set_source_files_properties(RAJAStream.cpp PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) - cuda_add_executable(gpu-stream-raja main.cpp RAJAStream.cpp) - target_compile_definitions(gpu-stream-raja PUBLIC USE_RAJA) - target_link_libraries(gpu-stream-raja "-L${RAJA_PATH}/lib -lRAJA") -else() - message("Skipping RAJA... (use -DRAJA_PATH=/path/to/raja to opt in)") -endif() - -#------------------------------------------------------------------------------- -# Kokkos -#------------------------------------------------------------------------------- -if (KOKKOS_PATH) - if ("${CMAKE_SYSTEM_NAME}" MATCHES "Linux") - add_custom_target(gpu-stream-kokkos COMMAND make -f KokkosMakefile KOKKOS_PATH=${KOKKOS_PATH}) - else() - message("Skipping Kokkos (requires Linux)") - endif() -else() - message("Skipping Kokkos... (use -DKOKKOS_PATH=/path/to/kokkos to opt in)") -endif() - -#------------------------------------------------------------------------------- -# SYCL -#------------------------------------------------------------------------------- -if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" OR - "${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") - # Use C++14 if available, otherwise drop back to C++11 - check_cxx_compiler_flag("-std=c++14" CXX14) - if (CXX14) - set(CMAKE_REQUIRED_FLAGS "-std=c++14") - else() - set(CMAKE_REQUIRED_FLAGS "-std=c++11") - endif() -endif() - -check_include_file_cxx("CL/sycl.hpp" HAS_SYCL) -if (HAS_SYCL) - add_executable(gpu-stream-sycl main.cpp SYCLStream.cpp) - target_compile_definitions(gpu-stream-sycl PUBLIC SYCL) - - # The user must define this in order to use FindComputeCpp - if (COMPUTECPP_PACKAGE_ROOT_DIR) - message(STATUS "Using ComputeCpp for SYCL compilation") - include(FindComputeCpp) - - include_directories(${COMPUTECPP_INCLUDE_DIRECTORY}) - - set(SOURCE_NAME "SYCLStream") - - target_compile_options(gpu-stream-sycl PUBLIC ${HOST_COMPILER_OPTIONS}) - add_sycl_to_target(gpu-stream-sycl ${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE_NAME}.cpp - ${CMAKE_CURRENT_BINARY_DIR}) - else() - message(STATUS "Using header-only SYCL implementation") - set_property(TARGET gpu-stream-sycl PROPERTY CXX_STANDARD 14) - endif() -else () - message("Skipping SYCL...") -endif (HAS_SYCL) diff --git a/CUDA.make b/CUDA.make new file mode 100644 index 0000000..3edf0f5 --- /dev/null +++ b/CUDA.make @@ -0,0 +1,10 @@ +CXXFLAGS=-O3 +CUDA_CXX=nvcc + +cuda-stream: main.cpp CUDAStream.cu + $(CUDA_CXX) -std=c++11 $(CXXFLAGS) -DCUDA $^ $(EXTRA_FLAGS) -o $@ + +.PHONY: clean +clean: + rm -f cuda-stream + diff --git a/CUDAStream.cu b/CUDAStream.cu index 603b0f0..7c2e6e9 100644 --- a/CUDAStream.cu +++ b/CUDAStream.cu @@ -215,9 +215,7 @@ void CUDAStream::triad() template __global__ void dot_kernel(const T * a, const T * b, T * sum, unsigned int array_size) { - - extern __shared__ __align__(sizeof(T)) unsigned char smem[]; - T *tb_sum = reinterpret_cast(smem); + __shared__ T tb_sum[TBSIZE]; int i = blockDim.x * blockIdx.x + threadIdx.x; const size_t local_i = threadIdx.x; @@ -242,7 +240,7 @@ __global__ void dot_kernel(const T * a, const T * b, T * sum, unsigned int array template T CUDAStream::dot() { - dot_kernel<<>>(d_a, d_b, d_sum, array_size); + dot_kernel<<>>(d_a, d_b, d_sum, array_size); check_error(); #if defined(MANAGED) || defined(PAGEFAULT) diff --git a/HIP.make b/HIP.make new file mode 100644 index 0000000..35b0a6a --- /dev/null +++ b/HIP.make @@ -0,0 +1,12 @@ + +# TODO: HIP with HCC + +HIPCC = hipcc + +hip-stream: main.cpp HIPStream.cpp + $(HIPCC) $(CXXFLAGS) -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@ + +.PHONY: clean +clean: + rm -f hip-stream + diff --git a/HIPStream.cu b/HIPStream.cpp similarity index 80% rename from HIPStream.cu rename to HIPStream.cpp index 8c02348..7bf724a 100644 --- a/HIPStream.cu +++ b/HIPStream.cpp @@ -9,6 +9,7 @@ #include "hip/hip_runtime.h" #define TBSIZE 1024 +#define DOT_NUM_BLOCKS 256 void check_error(void) { @@ -47,6 +48,9 @@ HIPStream::HIPStream(const unsigned int ARRAY_SIZE, const int device_index) array_size = ARRAY_SIZE; + // Allocate the host array for partial sums for dot kernels + sums = (T*)malloc(sizeof(T) * DOT_NUM_BLOCKS); + // Check buffers fit on the device hipDeviceProp_t props; hipGetDeviceProperties(&props, 0); @@ -60,20 +64,27 @@ HIPStream::HIPStream(const unsigned int ARRAY_SIZE, const int device_index) check_error(); hipMalloc(&d_c, ARRAY_SIZE*sizeof(T)); check_error(); + hipMalloc(&d_sum, DOT_NUM_BLOCKS*sizeof(T)); + check_error(); } template HIPStream::~HIPStream() { + free(sums); + hipFree(d_a); check_error(); hipFree(d_b); check_error(); hipFree(d_c); check_error(); + hipFree(d_sum); + check_error(); } + template __global__ void init_kernel(hipLaunchParm lp, T * a, T * b, T * c, T initA, T initB, T initC) { @@ -171,6 +182,46 @@ void HIPStream::triad() check_error(); } +template +__global__ void dot_kernel(hipLaunchParm lp, const T * a, const T * b, T * sum, unsigned int array_size) +{ + __shared__ T tb_sum[TBSIZE]; + + int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + const size_t local_i = hipThreadIdx_x; + + tb_sum[local_i] = 0.0; + for (; i < array_size; i += hipBlockDim_x*hipGridDim_x) + tb_sum[local_i] += a[i] * b[i]; + + for (int offset = hipBlockDim_x / 2; offset > 0; offset /= 2) + { + __syncthreads(); + if (local_i < offset) + { + tb_sum[local_i] += tb_sum[local_i+offset]; + } + } + + if (local_i == 0) + sum[hipBlockIdx_x] = tb_sum[local_i]; +} + +template +T HIPStream::dot() +{ + hipLaunchKernel(HIP_KERNEL_NAME(dot_kernel), dim3(DOT_NUM_BLOCKS), dim3(TBSIZE), 0, 0, d_a, d_b, d_sum, array_size); + check_error(); + + hipMemcpy(sums, d_sum, DOT_NUM_BLOCKS*sizeof(T), hipMemcpyDeviceToHost); + check_error(); + + T sum = 0.0; + for (int i = 0; i < DOT_NUM_BLOCKS; i++) + sum += sums[i]; + + return sum; +} void listDevices(void) { diff --git a/HIPStream.h b/HIPStream.h index 392080a..9209388 100644 --- a/HIPStream.h +++ b/HIPStream.h @@ -21,10 +21,15 @@ class HIPStream : public Stream protected: // Size of arrays unsigned int array_size; + + // Host array for partial sums for dot kernel + T *sums; + // Device side pointers to arrays T *d_a; T *d_b; T *d_c; + T *d_sum; public: @@ -36,6 +41,7 @@ class HIPStream : public Stream virtual void add() override; virtual void mul() override; virtual void triad() override; + virtual T dot() override; virtual void init_arrays(T initA, T initB, T initC) override; virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; diff --git a/KOKKOSStream.cpp b/KOKKOSStream.cpp index 9391a13..45f4dff 100644 --- a/KOKKOSStream.cpp +++ b/KOKKOSStream.cpp @@ -39,10 +39,10 @@ void KOKKOSStream::init_arrays(T initA, T initB, T initC) View a(*d_a); View b(*d_b); View c(*d_c); - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + parallel_for(array_size, KOKKOS_LAMBDA (const long index) { a[index] = initA; - b[index] - initB; + b[index] = initB; c[index] = initC; }); Kokkos::fence(); @@ -70,7 +70,7 @@ void KOKKOSStream::copy() View b(*d_b); View c(*d_c); - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + parallel_for(array_size, KOKKOS_LAMBDA (const long index) { c[index] = a[index]; }); @@ -85,7 +85,7 @@ void KOKKOSStream::mul() View c(*d_c); const T scalar = startScalar; - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + parallel_for(array_size, KOKKOS_LAMBDA (const long index) { b[index] = scalar*c[index]; }); @@ -99,7 +99,7 @@ void KOKKOSStream::add() View b(*d_b); View c(*d_c); - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + parallel_for(array_size, KOKKOS_LAMBDA (const long index) { c[index] = a[index] + b[index]; }); @@ -114,7 +114,7 @@ void KOKKOSStream::triad() View c(*d_c); const T scalar = startScalar; - parallel_for(array_size, KOKKOS_LAMBDA (const int index) + parallel_for(array_size, KOKKOS_LAMBDA (const long index) { a[index] = b[index] + scalar*c[index]; }); @@ -129,7 +129,7 @@ T KOKKOSStream::dot() T sum = 0.0; - parallel_reduce(array_size, KOKKOS_LAMBDA (const int index, double &tmp) + parallel_reduce(array_size, KOKKOS_LAMBDA (const long index, double &tmp) { tmp += a[index] * b[index]; }, sum); diff --git a/Kokkos.make b/Kokkos.make new file mode 100644 index 0000000..1c6207d --- /dev/null +++ b/Kokkos.make @@ -0,0 +1,45 @@ + +default: kokkos-stream + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifndef COMPILER +define compiler_help +Set COMPILER to change flags (defaulting to GNU). +Available compilers are: + GNU INTEL + +endef +$(info $(compiler_help)) +COMPILER=GNU +endif + +COMPILER_GNU = g++ +COMPILER_INTEL = icpc -qopt-streaming-stores=always +CXX = $(COMPILER_$(COMPILER)) + +ifndef TARGET +define target_help +Set TARGET to change to offload device. Defaulting to CPU. +Available targets are: + CPU (default) + GPU +endef +$(info $(target_help)) +TARGET=CPU +endif + +ifeq ($(TARGET), CPU) +TARGET_DEF = -DKOKKOS_TARGET_CPU +else ifeq ($(TARGET), GPU) +CXX = $(NVCC_WRAPPER) +TARGET_DEF = +endif + +kokkos-stream: main.cpp KOKKOSStream.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(KOKKOS_LDFLAGS) main.cpp KOKKOSStream.cpp $(KOKKOS_LIBS) -o $@ -DKOKKOS $(TARGET_DEF) -O3 $(EXTRA_FLAGS) + +.PHONY: clean +clean: + rm -f kokkos-stream + diff --git a/KokkosCPUMakefile b/KokkosCPUMakefile deleted file mode 100644 index caa8b77..0000000 --- a/KokkosCPUMakefile +++ /dev/null @@ -1,10 +0,0 @@ - -default: gpu-stream-kokkos - -include $(KOKKOS_PATH)/Makefile.kokkos - -gpu-stream-kokkos: main.o KOKKOSStream.o - $(CXX) $(KOKKOS_LDFLAGS) $^ $(KOKKOS_LIBS) -o $@ -DKOKKOS -DKOKKOS_TARGET_CPU -O3 - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) -c $< -DKOKKOS -DKOKKOS_TARGET_CPU -O3 diff --git a/KokkosMakefile b/KokkosMakefile deleted file mode 100644 index 1104cf4..0000000 --- a/KokkosMakefile +++ /dev/null @@ -1,11 +0,0 @@ - -default: gpu-stream-kokkos - -include $(KOKKOS_PATH)/Makefile.kokkos - -gpu-stream-kokkos: main.o KOKKOSStream.o - $(CXX) $(KOKKOS_LDFLAGS) $^ $(KOKKOS_LIBS) -o $@ -DKOKKOS -O3 - -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) - $(NVCC_WRAPPER) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) -c $< -DKOKKOS -O3 - diff --git a/LICENSE b/LICENSE index 1bc1114..1835c1a 100644 --- a/LICENSE +++ b/LICENSE @@ -12,22 +12,22 @@ * 3. You are free to publish results obtained from running this * program, or from works that you derive from this program, * with the following limitations: -* 3a. In order to be referred to as "GPU-STREAM benchmark results", -* published results must be in conformance to the GPU-STREAM +* 3a. In order to be referred to as "BabelStream benchmark results", +* published results must be in conformance to the BabelStream * Run Rules published at -* http://github.com/UoB-HPC/GPU-STREAM/wiki/Run-Rules +* http://github.com/UoB-HPC/BabelStream/wiki/Run-Rules * and incorporated herein by reference. * The copyright holders retain the * right to determine conformity with the Run Rules. * 3b. Results based on modified source code or on runs not in -* accordance with the GPU-STREAM Run Rules must be clearly +* accordance with the BabelStream Run Rules must be clearly * labelled whenever they are published. Examples of * proper labelling include: -* "tuned GPU-STREAM benchmark results" -* "based on a variant of the GPU-STREAM benchmark code" +* "tuned BabelStream benchmark results" +* "based on a variant of the BabelStream benchmark code" * Other comparable, clear and reasonable labelling is * acceptable. -* 3c. Submission of results to the GPU-STREAM benchmark web site +* 3c. Submission of results to the BabelStream benchmark web site * is encouraged, but not required. * 4. Use of this program or creation of derived works based on this * program constitutes acceptance of these licensing restrictions. diff --git a/OMPStream.cpp b/OMPStream.cpp index 189cacb..06346e7 100644 --- a/OMPStream.cpp +++ b/OMPStream.cpp @@ -7,6 +7,10 @@ #include "OMPStream.h" +#ifndef ALIGNMENT +#define ALIGNMENT (2*1024*1024) // 2MB +#endif + template OMPStream::OMPStream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int device) { @@ -22,9 +26,9 @@ OMPStream::OMPStream(const unsigned int ARRAY_SIZE, T *a, T *b, T *c, int dev {} #else // Allocate on the host - this->a = (T*)malloc(sizeof(T)*array_size); - this->b = (T*)malloc(sizeof(T)*array_size); - this->c = (T*)malloc(sizeof(T)*array_size); + this->a = (T*)aligned_alloc(ALIGNMENT, sizeof(T)*array_size); + this->b = (T*)aligned_alloc(ALIGNMENT, sizeof(T)*array_size); + this->c = (T*)aligned_alloc(ALIGNMENT, sizeof(T)*array_size); #endif } diff --git a/OpenACC.make b/OpenACC.make new file mode 100644 index 0000000..cc61f8e --- /dev/null +++ b/OpenACC.make @@ -0,0 +1,52 @@ + +ifndef COMPILER +define compiler_help +Set COMPILER to ensure correct flags are set. +Available compilers are: + PGI CRAY +endef +$(info $(compiler_help)) +endif + +COMPILER_ = $(CXX) +COMPILER_PGI = pgc++ +COMPILER_CRAY = CC + +FLAGS_ = -O3 -std=c++11 + +FLAGS_PGI = -std=c++11 -O3 -acc +ifeq ($(COMPILER), PGI) +define target_help +Set a TARGET to ensure PGI targets the correct offload device. +Available targets are: + SNB, IVB, HSW + KEPLER, MAXWELL, PASCAL + HAWAII +endef +ifndef TARGET +$(error $(target_help)) +endif +TARGET_FLAGS_SNB = -ta=multicore -tp=sandybridge +TARGET_FLAGS_IVB = -ta=multicore -tp=ivybridge +TARGET_FLAGS_HSW = -ta=multicore -tp=haswell +TARGET_FLAGS_KEPLER = -ta=nvidia:cc35 +TARGET_FLAGS_MAXWELL = -ta=nvidia:cc50 +TARGET_FLAGS_PASCAL = -ta=nvidia:cc60 +TARGET_FLAGS_HAWAII = -ta=radeon:hawaii +ifeq ($(TARGET_FLAGS_$(TARGET)),) +$(error $(target_help)) +endif + +FLAGS_PGI += $(TARGET_FLAGS_$(TARGET)) + +endif + +FLAGS_CRAY = -hstd=c++11 +CXXFLAGS = $(FLAGS_$(COMPILER)) + +acc-stream: main.cpp ACCStream.cpp + $(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@ + +.PHONY: clean +clean: + rm -f acc-stream main.o ACCStream.o diff --git a/OpenCL.make b/OpenCL.make new file mode 100644 index 0000000..8ad7108 --- /dev/null +++ b/OpenCL.make @@ -0,0 +1,39 @@ + +ifndef COMPILER +define compiler_help +Set COMPILER to change flags (defaulting to GNU). +Available compilers are: + GNU CLANG INTEL CRAY + +endef +$(info $(compiler_help)) +COMPILER=GNU +endif + +COMPILER_GNU = g++ +COMPILER_CLANG = clang++ +COMPILER_INTEL = icpc +COMPILER_CRAY = CC +CXX = $(COMPILER_$(COMPILER)) + +FLAGS_ = -O3 -std=c++11 +FLAGS_GNU = -O3 -std=c++11 +FLAGS_CLANG = -O3 -std=c++11 +FLAGS_INTEL = -O3 -std=c++11 +FLAGS_CRAY = -O3 -hstd=c++11 +CXXFLAGS=$(FLAGS_$(COMPILER)) + +PLATFORM = $(shell uname -s) +ifeq ($(PLATFORM), Darwin) + LIBS = -framework OpenCL +else + LIBS = -lOpenCL +endif + +ocl-stream: main.cpp OCLStream.cpp + $(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@ + +.PHONY: clean +clean: + rm -f ocl-stream + diff --git a/OpenMP.make b/OpenMP.make new file mode 100644 index 0000000..2028c46 --- /dev/null +++ b/OpenMP.make @@ -0,0 +1,60 @@ + +ifndef COMPILER +define compiler_help +Set COMPILER to change flags (defaulting to GNU). +Available compilers are: + CLANG CRAY GNU INTEL XL + +endef +$(info $(compiler_help)) +COMPILER=GNU +endif + +ifndef TARGET +define target_help +Set TARGET to change device (defaulting to CPU). +Available targets are: + CPU NVIDIA + +endef +$(info $(target_help)) +TARGET=CPU +endif + +COMPILER_GNU = g++ +COMPILER_INTEL = icpc +COMPILER_CRAY = CC +COMPILER_CLANG = clang++ +COMPILER_XL = xlc++ +CXX = $(COMPILER_$(COMPILER)) + +FLAGS_GNU = -O3 -std=c++11 +FLAGS_INTEL = -O3 -std=c++11 -xHOST -qopt-streaming-stores=always +FLAGS_CRAY = -O3 -hstd=c++11 +FLAGS_CLANG = -O3 -std=c++11 +FLAGS_XL = -O5 -qarch=pwr8 -qtune=pwr8 -std=c++11 +CXXFLAGS = $(FLAGS_$(COMPILER)) + +# OpenMP flags for CPUs +OMP_GNU_CPU = -fopenmp +OMP_INTEL_CPU = -qopenmp +OMP_CRAY_CPU = -homp +OMP_CLANG_CPU = -fopenmp=libomp +OMP_XL_CPU = -qsmp=omp -qthreaded + +# OpenMP flags for NVIDIA +OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU +OMP_CLANG_NVIDIA = -DOMP_TARGET_GPU -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda + +ifndef OMP_$(COMPILER)_$(TARGET) +$(error Targeting $(TARGET) with $(COMPILER) not supported) +endif + +OMP = $(OMP_$(COMPILER)_$(TARGET)) + +omp-stream: main.cpp OMPStream.cpp + $(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@ + +.PHONY: clean +clean: + rm -f omp-stream diff --git a/RAJA.make b/RAJA.make new file mode 100644 index 0000000..47aeefb --- /dev/null +++ b/RAJA.make @@ -0,0 +1,58 @@ + +ifndef TARGET +define target_help +Set TARGET to change to offload device. Defaulting to CPU. +Available targets are: + CPU (default) + GPU +endef +$(info $(target_help)) +TARGET=CPU +endif + +ifeq ($(TARGET), CPU) + +ifndef COMPILER +define compiler_help +Set COMPILER to change flags (defaulting to GNU). +Available compilers are: + INTEL GNU CRAY XL +endef +$(info $(compiler_help)) +COMPILER=GNU +endif + +CXX_INTEL = icpc +CXX_GNU = g++ +CXX_CRAY = CC +CXX_XL = xlc++ + +CXXFLAGS_INTEL = -O3 -std=c++11 -qopenmp -xHost -qopt-streaming-stores=always +CXXFLAGS_GNU = -O3 -std=c++11 -fopenmp +CXXFLAGS_CRAY = -O3 -hstd=c++11 +CXXFLAGS_XL = -O5 -std=c++11 -qarch=pwr8 -qtune=pwr8 -qsmp=omp -qthreaded + +CXX = $(CXX_$(COMPILER)) +CXXFLAGS = -DRAJA_TARGET_CPU $(CXXFLAGS_$(COMPILER)) + +else ifeq ($(TARGET), GPU) +CXX = nvcc + +ifndef ARCH +define arch_help +Set ARCH to ensure correct GPU architecture. +Example: + ARCH=sm_35 +endef +$(error $(arch_help)) +endif +CXXFLAGS = --expt-extended-lambda -O3 -std=c++11 -x cu -Xcompiler -fopenmp -arch $(ARCH) +endif + +raja-stream: main.cpp RAJAStream.cpp + $(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@ + +.PHONY: clean +clean: + rm -f raja-stream + diff --git a/RAJAStream.cpp b/RAJAStream.cpp index 240f160..395a6ee 100644 --- a/RAJAStream.cpp +++ b/RAJAStream.cpp @@ -10,6 +10,10 @@ using RAJA::forall; using RAJA::RangeSegment; +#ifndef ALIGNMENT +#define ALIGNMENT (2*1024*1024) // 2MB +#endif + template RAJAStream::RAJAStream(const unsigned int ARRAY_SIZE, const int device_index) : array_size(ARRAY_SIZE) @@ -18,9 +22,9 @@ RAJAStream::RAJAStream(const unsigned int ARRAY_SIZE, const int device_index) index_set.push_back(seg); #ifdef RAJA_TARGET_CPU - d_a = new T[ARRAY_SIZE]; - d_b = new T[ARRAY_SIZE]; - d_c = new T[ARRAY_SIZE]; + d_a = (T*)aligned_alloc(ALIGNMENT, sizeof(T)*array_size); + d_b = (T*)aligned_alloc(ALIGNMENT, sizeof(T)*array_size); + d_c = (T*)aligned_alloc(ALIGNMENT, sizeof(T)*array_size); #else cudaMallocManaged((void**)&d_a, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal); cudaMallocManaged((void**)&d_b, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal); @@ -33,9 +37,9 @@ template RAJAStream::~RAJAStream() { #ifdef RAJA_TARGET_CPU - delete[] d_a; - delete[] d_b; - delete[] d_c; + free(d_a); + free(d_b); + free(d_c); #else cudaFree(d_a); cudaFree(d_b); @@ -46,10 +50,10 @@ RAJAStream::~RAJAStream() template void RAJAStream::init_arrays(T initA, T initB, T initC) { - T* a = d_a; - T* b = d_b; - T* c = d_c; - forall(index_set, [=] RAJA_DEVICE (int index) + T* RAJA_RESTRICT a = d_a; + T* RAJA_RESTRICT b = d_b; + T* RAJA_RESTRICT c = d_c; + forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) { a[index] = initA; b[index] = initB; @@ -69,9 +73,9 @@ void RAJAStream::read_arrays( template void RAJAStream::copy() { - T* a = d_a; - T* c = d_c; - forall(index_set, [=] RAJA_DEVICE (int index) + T* RAJA_RESTRICT a = d_a; + T* RAJA_RESTRICT c = d_c; + forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) { c[index] = a[index]; }); @@ -80,10 +84,10 @@ void RAJAStream::copy() template void RAJAStream::mul() { - T* b = d_b; - T* c = d_c; + T* RAJA_RESTRICT b = d_b; + T* RAJA_RESTRICT c = d_c; const T scalar = startScalar; - forall(index_set, [=] RAJA_DEVICE (int index) + forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) { b[index] = scalar*c[index]; }); @@ -92,10 +96,10 @@ void RAJAStream::mul() template void RAJAStream::add() { - T* a = d_a; - T* b = d_b; - T* c = d_c; - forall(index_set, [=] RAJA_DEVICE (int index) + T* RAJA_RESTRICT a = d_a; + T* RAJA_RESTRICT b = d_b; + T* RAJA_RESTRICT c = d_c; + forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) { c[index] = a[index] + b[index]; }); @@ -104,11 +108,11 @@ void RAJAStream::add() template void RAJAStream::triad() { - T* a = d_a; - T* b = d_b; - T* c = d_c; + T* RAJA_RESTRICT a = d_a; + T* RAJA_RESTRICT b = d_b; + T* RAJA_RESTRICT c = d_c; const T scalar = startScalar; - forall(index_set, [=] RAJA_DEVICE (int index) + forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) { a[index] = b[index] + scalar*c[index]; }); @@ -117,12 +121,12 @@ void RAJAStream::triad() template T RAJAStream::dot() { - T* a = d_a; - T* b = d_b; + T* RAJA_RESTRICT a = d_a; + T* RAJA_RESTRICT b = d_b; RAJA::ReduceSum sum(0.0); - forall(index_set, [=] RAJA_DEVICE (int index) + forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) { sum += a[index] * b[index]; }); diff --git a/README.android b/README.android new file mode 100644 index 0000000..edc4a52 --- /dev/null +++ b/README.android @@ -0,0 +1,36 @@ +Android (outdated instructions) +------------------ + +Assuming you have a recent Android NDK available, you can use the +toolchain that it provides to build GPU-STREAM. You should first +use the NDK to generate a standalone toolchain: + + # Select a directory to install the toolchain to + ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain + + ${NDK}/build/tools/make-standalone-toolchain.sh \ + --platform=android-14 \ + --toolchain=arm-linux-androideabi-4.8 \ + --install-dir=${ANDROID_NATIVE_TOOLCHAIN} + +Make sure that the OpenCL headers and library (libOpenCL.so) are +available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`. + +You should then be able to build GPU-STREAM: + + make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++ + +Copy the executable and OpenCL kernels to the device: + + adb push gpu-stream-ocl /data/local/tmp + adb push ocl-stream-kernels.cl /data/local/tmp + +Run GPU-STREAM from an adb shell: + + adb shell + cd /data/local/tmp + + # Use float if device doesn't support double, and reduce array size + ./gpu-stream-ocl --float -n 6 -s 10000000 + + diff --git a/README.md b/README.md index b024915..0f93be6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -GPU-STREAM +BabelStream ========== Measure memory transfer rates to/from global device memory on GPUs. @@ -16,53 +16,51 @@ Currently implemented are: - RAJA - SYCL +This code was previously called GPU-STREAM. + Website ------- -[uob-hpc.github.io/GPU-STREAM/](uob-hpc.github.io/GPU-STREAM/) +[uob-hpc.github.io/BabelStream/](https://uob-hpc.github.io/BabelStream/) Usage ----- -CMake 3.2 or above is required. -Drivers, compiler and software applicable to whichever implementation you would like to build against. Our build system is designed to only build implementations in programming models that your system supports. +Drivers, compiler and software applicable to whichever implementation you would like to build against is required. -Generate the Makefile with `cmake .` +We have supplied a series of Makefiles, one for each programming model, to assist with building. +The Makefiles contain common build options, and should be simple to customise for your needs too. -Android (outdated instructions) ------------------- +General usage is `make -f .make` +Common compiler flags and names can be set by passing a `COMPILER` option to Make, e.g. `make COMPILER=GNU`. +Some models allow specifying a CPU or GPU style target, and this can be set by passing a `TARGET` option to Make, e.g. `make TARGET=GPU`. -Assuming you have a recent Android NDK available, you can use the -toolchain that it provides to build GPU-STREAM. You should first -use the NDK to generate a standalone toolchain: +Pass in extra flags via the `EXTRA_FLAGS` option. - # Select a directory to install the toolchain to - ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain +The binaries are named in the form `-stream`. - ${NDK}/build/tools/make-standalone-toolchain.sh \ - --platform=android-14 \ - --toolchain=arm-linux-androideabi-4.8 \ - --install-dir=${ANDROID_NATIVE_TOOLCHAIN} +Building Kokkos +--------------- -Make sure that the OpenCL headers and library (libOpenCL.so) are -available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`. +We use the following command to build Kokkos using the Intel Compiler, specifying the `arch` appropriately, e.g. `KNL`. +``` +../generate_makefile.bash --prefix= --with-openmp --with-pthread --arch= --compiler=icpc --cxxflags=-DKOKKOS_MEMORY_ALIGNMENT=2097152 +``` +For building with CUDA support, we use the following command, specifying the `arch` appropriately, e.g. `Kepler35`. +``` +../generate_makefile.bash --prefix= --with-cuda --with-openmp --with-pthread --arch= --with-cuda-options=enable_lambda +``` -You should then be able to build GPU-STREAM: - - make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++ - -Copy the executable and OpenCL kernels to the device: - - adb push gpu-stream-ocl /data/local/tmp - adb push ocl-stream-kernels.cl /data/local/tmp - -Run GPU-STREAM from an adb shell: - - adb shell - cd /data/local/tmp - - # Use float if device doesn't support double, and reduce array size - ./gpu-stream-ocl --float -n 6 -s 10000000 +Building RAJA +------------- +We use the following command to build RAJA using the Intel Compiler. +``` +cmake .. -DCMAKE_INSTALL_PREFIX= -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DRAJA_PTR="RAJA_USE_RESTRICT_ALIGNED_PTR" -DCMAKE_BUILD_TYPE=ICCBuild -DRAJA_ENABLE_TESTS=Off +``` +For building with CUDA support, we use the following command. +``` +cmake .. -DCMAKE_INSTALL_PREFIX= -DRAJA_PTR="RAJA_USE_RESTRICT_ALIGNED_PTR" -DRAJA_ENABLE_CUDA=1 -DRAJA_ENABLE_TESTS=Off +``` Results ------- @@ -72,13 +70,17 @@ Sample results can be found in the `results` subdirectory. If you would like to Citing ------ -You can view the [Poster and Extended Abstract](http://sc15.supercomputing.org/sites/all/themes/SC15images/tech_poster/tech_poster_pages/post150.html) on GPU-STREAM presented at SC'15. Please cite GPU-STREAM via this reference: +Please cite BabelStream via this reference: > Deakin T, Price J, Martineau M, McIntosh-Smith S. GPU-STREAM v2.0: Benchmarking the achievable memory bandwidth of many-core processors across diverse parallel programming models. 2016. Paper presented at P^3MA Workshop at ISC High Performance, Frankfurt, Germany. -**Other GPU-STREAM publications:** +**Other BabelStream publications:** > Deakin T, McIntosh-Smith S. GPU-STREAM: Benchmarking the achievable memory bandwidth of Graphics Processing Units. 2015. Poster session presented at IEEE/ACM SuperComputing, Austin, United States. +You can view the [Poster and Extended Abstract](http://sc15.supercomputing.org/sites/all/themes/SC15images/tech_poster/tech_poster_pages/post150.html). + +> Deakin T, Price J, Martineau M, McIntosh-Smith S. GPU-STREAM: Now in 2D!. 2016. Poster session presented at IEEE/ACM SuperComputing, Salt Lake City, United States. +You can view the [Poster and Extended Abstract](http://sc16.supercomputing.org/sc-archive/tech_poster/tech_poster_pages/post139.html). diff --git a/SYCL.make b/SYCL.make new file mode 100644 index 0000000..1e1c480 --- /dev/null +++ b/SYCL.make @@ -0,0 +1,12 @@ + +COMPUTECPP_FLAGS = $(shell computecpp_info --dump-device-compiler-flags) + +sycl-stream: main.cpp SYCLStream.cpp SYCLStream.sycl + $(CXX) -O3 -std=c++11 -DSYCL main.cpp SYCLStream.cpp -include SYCLStream.sycl $(EXTRA_FLAGS) -lComputeCpp -lOpenCL -o $@ + +SYCLStream.sycl: SYCLStream.cpp + compute++ SYCLStream.cpp $(COMPUTECPP_FLAGS) -c + +.PHONY: clean +clean: + rm -f sycl-stream SYCLStream.sycl SYCLStream.bc diff --git a/cmake/Modules/FindComputeCpp.cmake b/cmake/Modules/FindComputeCpp.cmake deleted file mode 100644 index 5e08902..0000000 --- a/cmake/Modules/FindComputeCpp.cmake +++ /dev/null @@ -1,248 +0,0 @@ -#.rst: -# FindComputeCpp -#--------------- -# -# Copyright 2016 Codeplay Software Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use these files except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -######################### -# FindComputeCpp.cmake -######################### -# -# Tools for finding and building with ComputeCpp. -# -# User must define COMPUTECPP_PACKAGE_ROOT_DIR pointing to the ComputeCpp -# installation. -# -# Latest version of this file can be found at: -# https://github.com/codeplaysoftware/computecpp-sdk - -# Require CMake version 3.2.2 or higher -cmake_minimum_required(VERSION 3.2.2) - -# Check that a supported host compiler can be found -if(CMAKE_COMPILER_IS_GNUCXX) - # Require at least gcc 4.8 - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) - message(FATAL_ERROR - "host compiler - Not found! (gcc version must be at least 4.8)") - # Require the GCC dual ABI to be disabled for 5.1 or higher - elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1) - set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True") - message(STATUS - "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)") - else() - message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}") - endif() -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - # Require at least clang 3.6 - if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) - message(FATAL_ERROR - "host compiler - Not found! (clang version must be at least 3.6)") - else() - message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}") - endif() -else() - message(WARNING - "host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)") -endif() - -set(COMPUTECPP_64_BIT_DEFAULT ON) -option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode" - ${COMPUTECPP_64_BIT_DEFAULT}) -mark_as_advanced(COMPUTECPP_64_BIT_CODE) - -# Find OpenCL package -find_package(OpenCL REQUIRED) - -# Find ComputeCpp packagee -if(NOT COMPUTECPP_PACKAGE_ROOT_DIR) - message(FATAL_ERROR - "ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR") -else() - message(STATUS "ComputeCpp package - Found") -endif() -option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package") - -# Obtain the path to compute++ -find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS - ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin) -if (EXISTS ${COMPUTECPP_DEVICE_COMPILER}) - mark_as_advanced(COMPUTECPP_DEVICE_COMPILER) - message(STATUS "compute++ - Found") -else() - message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER})") -endif() - -# Obtain the path to computecpp_info -find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS - ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin) -if (EXISTS ${COMPUTECPP_INFO_TOOL}) - mark_as_advanced(${COMPUTECPP_INFO_TOOL}) - message(STATUS "computecpp_info - Found") -else() - message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL})") -endif() - -# Obtain the path to the ComputeCpp runtime library -find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR} - HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib - DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH) - -if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY}) - mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY) - message(STATUS "libComputeCpp.so - Found") -else() - message(FATAL_ERROR "libComputeCpp.so - Not found!") -endif() - -# Obtain the ComputeCpp include directory -set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/) -if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY}) - message(FATAL_ERROR "ComputeCpp includes - Not found!") -else() - message(STATUS "ComputeCpp includes - Found") -endif() - -# Obtain the package version -execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version" - OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION - RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) -if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") - message(FATAL_ERROR "Package version - Error obtaining version!") -else() - mark_as_advanced(COMPUTECPP_PACKAGE_VERSION) - message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}") -endif() - -# Obtain the device compiler flags -execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags" - OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS - RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) -if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") - message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!") -else() - mark_as_advanced(COMPUTECPP_COMPILER_FLAGS) - message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}") -endif() - -# Check if the platform is supported -execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported" - OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED - RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) -if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") - message(FATAL_ERROR "platform - Error checking platform support!") -else() - mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED) - if (COMPUTECPP_PLATFORM_IS_SUPPORTED) - message(STATUS "platform - your system can support ComputeCpp") - else() - message(STATUS "platform - your system CANNOT support ComputeCpp") - endif() -endif() - -#################### -# __build_sycl -#################### -# -# Adds a custom target for running compute++ and adding a dependency for the -# resulting integration header. -# -# targetName : Name of the target. -# sourceFile : Source file to be compiled. -# binaryDir : Intermediate directory to output the integration header. -# -function(__build_spir targetName sourceFile binaryDir) - - # Retrieve source file name. - get_filename_component(sourceFileName ${sourceFile} NAME) - - # Set the path to the Sycl file. - set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl) - - # Add any user-defined include to the device compiler - get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY - INCLUDE_DIRECTORIES) - set(device_compiler_includes "") - foreach(directory ${includeDirectories}) - set(device_compiler_includes "-I${directory}" ${device_compiler_includes}) - endforeach() - if (CMAKE_INCLUDE_PATH) - foreach(directory ${CMAKE_INCLUDE_PATH}) - set(device_compiler_includes "-I${directory}" - ${device_compiler_includes}) - endforeach() - endif() - - # Convert argument list format - separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS) - - # Add custom command for running compute++ - add_custom_command( - OUTPUT ${outputSyclFile} - COMMAND ${COMPUTECPP_DEVICE_COMPILER} - ${COMPUTECPP_DEVICE_COMPILER_FLAGS} - -isystem ${COMPUTECPP_INCLUDE_DIRECTORY} - ${COMPUTECPP_PLATFORM_SPECIFIC_ARGS} - ${device_compiler_includes} - -o ${outputSyclFile} - -c ${sourceFile} - DEPENDS ${sourceFile} - WORKING_DIRECTORY ${binaryDir} - COMMENT "Building ComputeCpp integration header file ${outputSyclFile}") - - # Add a custom target for the generated integration header - add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile}) - - # Add a dependency on the integration header - add_dependencies(${targetName} ${targetName}_integration_header) - - # Force inclusion of the integration header for the host compiler - set(compileFlags -include ${outputSyclFile} "-Wall") - target_compile_options(${targetName} PUBLIC ${compileFlags}) - - # Set the host compiler C++ standard to C++11 - set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11) - - # Disable GCC dual ABI on GCC 5.1 and higher - if(COMPUTECPP_DISABLE_GCC_DUAL_ABI) - set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS - "_GLIBCXX_USE_CXX11_ABI=0") - endif() - -endfunction() - -####################### -# add_sycl_to_target -####################### -# -# Adds a SYCL compilation custom command associated with an existing -# target and sets a dependancy on that new command. -# -# targetName : Name of the target to add a SYCL to. -# sourceFile : Source file to be compiled for SYCL. -# binaryDir : Intermediate directory to output the integration header. -# -function(add_sycl_to_target targetName sourceFile binaryDir) - - # Add custom target to run compute++ and generate the integration header - __build_spir(${targetName} ${sourceFile} ${binaryDir}) - - # Link with the ComputeCpp runtime library - target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY} - PUBLIC ${OpenCL_LIBRARIES}) - -endfunction(add_sycl_to_target) - diff --git a/main.cpp b/main.cpp index c1ca69f..33cef1e 100644 --- a/main.cpp +++ b/main.cpp @@ -15,7 +15,7 @@ #include #include -#define VERSION_STRING "devel" +#define VERSION_STRING "3.2" #include "Stream.h" @@ -54,7 +54,7 @@ void parseArguments(int argc, char *argv[]); int main(int argc, char *argv[]) { std::cout - << "GPU-STREAM" << std::endl + << "BabelStream" << std::endl << "Version: " << VERSION_STRING << std::endl << "Implementation: " << IMPLEMENTATION_STRING << std::endl;