diff --git a/CMakeLists.txt b/CMakeLists.txt index 91b4f5d..f80d762 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,16 @@ cmake_minimum_required(VERSION 3.2) +if(NOT DEFINED HIP_PATH) + if(NOT DEFINED ENV{HIP_PATH}) + set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") + else() + set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + endif() +endif() +set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) + + set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -27,6 +37,18 @@ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray") list(APPEND CMAKE_CXX_FLAGS -hstd=c++11) endif () +#------------------------------------------------------------------------------- +# HIP +#------------------------------------------------------------------------------- +find_package(HIP QUIET) +if(${HIP_FOUND}) + list(APPEND HIP_HIPCC_FLAGS --std=c++11) + hip_add_executable(gpu-stream-hip main.cpp HIPStream.cu) + target_compile_definitions(gpu-stream-hip PUBLIC HIP) +else() + message("Skipping HIP...") +endif() + #------------------------------------------------------------------------------- # CUDA #------------------------------------------------------------------------------- diff --git a/HIPStream.cu b/HIPStream.cu new file mode 100644 index 0000000..6ad6556 --- /dev/null +++ b/HIPStream.cu @@ -0,0 +1,215 @@ +#include "hip/hip_runtime.h" + +// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + + +#include "HIPStream.h" + +#define TBSIZE 1024 + +void check_error(void) +{ + hipError_t err = hipGetLastError(); + if (err != hipSuccess) + { + std::cerr << "Error: " << hipGetErrorString(err) << std::endl; + exit(err); + } +} + +template +HIPStream::HIPStream(const unsigned int ARRAY_SIZE, const int device_index) +{ + + // The array size must be divisible by TBSIZE for kernel launches + if (ARRAY_SIZE % TBSIZE != 0) + { + std::stringstream ss; + ss << "Array size must be a multiple of " << TBSIZE; + throw std::runtime_error(ss.str()); + } + + // Set device + int count; + hipGetDeviceCount(&count); + check_error(); + if (device_index >= count) + throw std::runtime_error("Invalid device index"); + hipSetDevice(device_index); + check_error(); + + // Print out device information + std::cout << "Using HIP device " << getDeviceName(device_index) << std::endl; + std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; + + array_size = ARRAY_SIZE; + + // Check buffers fit on the device + hipDeviceProp_t props; + hipGetDeviceProperties(&props, 0); + if (props.totalGlobalMem < 3*ARRAY_SIZE*sizeof(T)) + throw std::runtime_error("Device does not have enough memory for all 3 buffers"); + + // Create device buffers + hipMalloc(&d_a, ARRAY_SIZE*sizeof(T)); + check_error(); + hipMalloc(&d_b, ARRAY_SIZE*sizeof(T)); + check_error(); + hipMalloc(&d_c, ARRAY_SIZE*sizeof(T)); + check_error(); +} + + +template +HIPStream::~HIPStream() +{ + hipFree(d_a); + check_error(); + hipFree(d_b); + check_error(); + hipFree(d_c); + check_error(); +} + +template +void HIPStream::write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) +{ + // Copy host memory to device + hipMemcpy(d_a, a.data(), a.size()*sizeof(T), hipMemcpyHostToDevice); + check_error(); + hipMemcpy(d_b, b.data(), b.size()*sizeof(T), hipMemcpyHostToDevice); + check_error(); + hipMemcpy(d_c, c.data(), c.size()*sizeof(T), hipMemcpyHostToDevice); + check_error(); +} + +template +void HIPStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) +{ + // Copy device memory to host + hipMemcpy(a.data(), d_a, a.size()*sizeof(T), hipMemcpyDeviceToHost); + check_error(); + hipMemcpy(b.data(), d_b, b.size()*sizeof(T), hipMemcpyDeviceToHost); + check_error(); + hipMemcpy(c.data(), d_c, c.size()*sizeof(T), hipMemcpyDeviceToHost); + check_error(); +} + + +template +__global__ void copy_kernel(hipLaunchParm lp, const T * a, T * c) +{ + const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + c[i] = a[i]; +} + +template +void HIPStream::copy() +{ + hipLaunchKernel(HIP_KERNEL_NAME(copy_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_c); + check_error(); + hipDeviceSynchronize(); + check_error(); +} + +template +__global__ void mul_kernel(hipLaunchParm lp, T * b, const T * c) +{ + const T scalar = 0.3; + const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + b[i] = scalar * c[i]; +} + +template +void HIPStream::mul() +{ + hipLaunchKernel(HIP_KERNEL_NAME(mul_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_b, d_c); + check_error(); + hipDeviceSynchronize(); + check_error(); +} + +template +__global__ void add_kernel(hipLaunchParm lp, const T * a, const T * b, T * c) +{ + const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + c[i] = a[i] + b[i]; +} + +template +void HIPStream::add() +{ + hipLaunchKernel(HIP_KERNEL_NAME(add_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); + check_error(); + hipDeviceSynchronize(); + check_error(); +} + +template +__global__ void triad_kernel(hipLaunchParm lp, T * a, const T * b, const T * c) +{ + const T scalar = 0.3; + const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + a[i] = b[i] + scalar * c[i]; +} + +template +void HIPStream::triad() +{ + hipLaunchKernel(HIP_KERNEL_NAME(triad_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); + check_error(); + hipDeviceSynchronize(); + check_error(); +} + + +void listDevices(void) +{ + // Get number of devices + int count; + hipGetDeviceCount(&count); + check_error(); + + // Print device names + if (count == 0) + { + std::cerr << "No devices found." << std::endl; + } + else + { + std::cout << std::endl; + std::cout << "Devices:" << std::endl; + for (int i = 0; i < count; i++) + { + std::cout << i << ": " << getDeviceName(i) << std::endl; + } + std::cout << std::endl; + } +} + + +std::string getDeviceName(const int device) +{ + hipDeviceProp_t props; + hipGetDeviceProperties(&props, device); + check_error(); + return std::string(props.name); +} + + +std::string getDeviceDriver(const int device) +{ + hipSetDevice(device); + check_error(); + int driver; + hipDriverGetVersion(&driver); + check_error(); + return std::to_string(driver); +} + +template class HIPStream; +template class HIPStream; diff --git a/HIPStream.h b/HIPStream.h new file mode 100644 index 0000000..9015e35 --- /dev/null +++ b/HIPStream.h @@ -0,0 +1,43 @@ + +// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#pragma once + +#include +#include +#include + +#include "Stream.h" + +#define IMPLEMENTATION_STRING "HIP" + +template +class HIPStream : public Stream +{ + protected: + // Size of arrays + unsigned int array_size; + // Device side pointers to arrays + T *d_a; + T *d_b; + T *d_c; + + + public: + + HIPStream(const unsigned int, const int); + ~HIPStream(); + + virtual void copy() override; + virtual void add() override; + virtual void mul() override; + virtual void triad() override; + + virtual void write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) override; + virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; + +}; diff --git a/main.cpp b/main.cpp index 5379bb9..87ff9b2 100644 --- a/main.cpp +++ b/main.cpp @@ -20,6 +20,8 @@ #if defined(CUDA) #include "CUDAStream.h" +#elif defined(HIP) +#include "HIPStream.h" #elif defined(OCL) #include "OCLStream.h" #elif defined(USE_RAJA) @@ -100,6 +102,10 @@ void run() // Use the CUDA implementation stream = new CUDAStream(ARRAY_SIZE, deviceIndex); +#elif defined(HIP) + // Use the HIP implementation + stream = new HIPStream(ARRAY_SIZE, deviceIndex); + #elif defined(OCL) // Use the OpenCL implementation stream = new OCLStream(ARRAY_SIZE, deviceIndex);