// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, // University of Bristol HPC // // For full license terms please see the LICENSE file distributed with this // source code #include "HIPStream.h" #include "hip/hip_runtime.h" #define TBSIZE 1024 void check_error(void) { hipError_t err = hipGetLastError(); if (err != hipSuccess) { std::cerr << "Error: " << hipGetErrorString(err) << std::endl; exit(err); } } template HIPStream::HIPStream(const unsigned int ARRAY_SIZE, const int device_index) { // The array size must be divisible by TBSIZE for kernel launches if (ARRAY_SIZE % TBSIZE != 0) { std::stringstream ss; ss << "Array size must be a multiple of " << TBSIZE; throw std::runtime_error(ss.str()); } // Set device int count; hipGetDeviceCount(&count); check_error(); if (device_index >= count) throw std::runtime_error("Invalid device index"); hipSetDevice(device_index); check_error(); // Print out device information std::cout << "Using HIP device " << getDeviceName(device_index) << std::endl; std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; array_size = ARRAY_SIZE; // Check buffers fit on the device hipDeviceProp_t props; hipGetDeviceProperties(&props, 0); if (props.totalGlobalMem < 3*ARRAY_SIZE*sizeof(T)) throw std::runtime_error("Device does not have enough memory for all 3 buffers"); // Create device buffers hipMalloc(&d_a, ARRAY_SIZE*sizeof(T)); check_error(); hipMalloc(&d_b, ARRAY_SIZE*sizeof(T)); check_error(); hipMalloc(&d_c, ARRAY_SIZE*sizeof(T)); check_error(); } template HIPStream::~HIPStream() { hipFree(d_a); check_error(); hipFree(d_b); check_error(); hipFree(d_c); check_error(); } template __global__ void init_kernel(hipLaunchParm lp, T * a, T * b, T * c, T initA, T initB, T initC) { const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; a[i] = initA; b[i] = initB; c[i] = initC; } template void HIPStream::init_arrays(T initA, T initB, T initC) { hipLaunchKernel(HIP_KERNEL_NAME(init_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c, initA, initB, initC); check_error(); hipDeviceSynchronize(); check_error(); } template void HIPStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) { // Copy device memory to host hipMemcpy(a.data(), d_a, a.size()*sizeof(T), hipMemcpyDeviceToHost); check_error(); hipMemcpy(b.data(), d_b, b.size()*sizeof(T), hipMemcpyDeviceToHost); check_error(); hipMemcpy(c.data(), d_c, c.size()*sizeof(T), hipMemcpyDeviceToHost); check_error(); } template __global__ void copy_kernel(hipLaunchParm lp, const T * a, T * c) { const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; c[i] = a[i]; } template void HIPStream::copy() { hipLaunchKernel(HIP_KERNEL_NAME(copy_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_c); check_error(); hipDeviceSynchronize(); check_error(); } template __global__ void mul_kernel(hipLaunchParm lp, T * b, const T * c) { const T scalar = startScalar; const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; b[i] = scalar * c[i]; } template void HIPStream::mul() { hipLaunchKernel(HIP_KERNEL_NAME(mul_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_b, d_c); check_error(); hipDeviceSynchronize(); check_error(); } template __global__ void add_kernel(hipLaunchParm lp, const T * a, const T * b, T * c) { const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; c[i] = a[i] + b[i]; } template void HIPStream::add() { hipLaunchKernel(HIP_KERNEL_NAME(add_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); check_error(); hipDeviceSynchronize(); check_error(); } template __global__ void triad_kernel(hipLaunchParm lp, T * a, const T * b, const T * c) { const T scalar = startScalar; const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; a[i] = b[i] + scalar * c[i]; } template void HIPStream::triad() { hipLaunchKernel(HIP_KERNEL_NAME(triad_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); check_error(); hipDeviceSynchronize(); check_error(); } void listDevices(void) { // Get number of devices int count; hipGetDeviceCount(&count); check_error(); // Print device names if (count == 0) { std::cerr << "No devices found." << std::endl; } else { std::cout << std::endl; std::cout << "Devices:" << std::endl; for (int i = 0; i < count; i++) { std::cout << i << ": " << getDeviceName(i) << std::endl; } std::cout << std::endl; } } std::string getDeviceName(const int device) { hipDeviceProp_t props; hipGetDeviceProperties(&props, device); check_error(); return std::string(props.name); } std::string getDeviceDriver(const int device) { hipSetDevice(device); check_error(); int driver; hipDriverGetVersion(&driver); check_error(); return std::to_string(driver); } template class HIPStream; template class HIPStream;