diff --git a/HIP.make b/HIP.make index 35b0a6a..aa30359 100644 --- a/HIP.make +++ b/HIP.make @@ -1,7 +1,6 @@ -# TODO: HIP with HCC - -HIPCC = hipcc +HIP_PATH?= /opt/rocm/hip +HIPCC=$(HIP_PATH)/bin/hipcc hip-stream: main.cpp HIPStream.cpp $(HIPCC) $(CXXFLAGS) -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@ diff --git a/HIPStream.cpp b/HIPStream.cpp index 7bf724a..ede0256 100644 --- a/HIPStream.cpp +++ b/HIPStream.cpp @@ -86,7 +86,7 @@ HIPStream::~HIPStream() template -__global__ void init_kernel(hipLaunchParm lp, T * a, T * b, T * c, T initA, T initB, T initC) +__global__ void init_kernel(T * a, T * b, T * c, T initA, T initB, T initC) { const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; a[i] = initA; @@ -97,7 +97,7 @@ __global__ void init_kernel(hipLaunchParm lp, T * a, T * b, T * c, T initA, T in template void HIPStream::init_arrays(T initA, T initB, T initC) { - hipLaunchKernel(HIP_KERNEL_NAME(init_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c, initA, initB, initC); + hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c, initA, initB, initC); check_error(); hipDeviceSynchronize(); check_error(); @@ -117,7 +117,7 @@ void HIPStream::read_arrays(std::vector& a, std::vector& b, std::vector template -__global__ void copy_kernel(hipLaunchParm lp, const T * a, T * c) +__global__ void copy_kernel(const T * a, T * c) { const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; c[i] = a[i]; @@ -126,14 +126,14 @@ __global__ void copy_kernel(hipLaunchParm lp, const T * a, T * c) template void HIPStream::copy() { - hipLaunchKernel(HIP_KERNEL_NAME(copy_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_c); + hipLaunchKernelGGL(HIP_KERNEL_NAME(copy_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_c); check_error(); hipDeviceSynchronize(); check_error(); } template -__global__ void mul_kernel(hipLaunchParm lp, T * b, const T * c) +__global__ void mul_kernel(T * b, const T * c) { const T scalar = startScalar; const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; @@ -143,14 +143,14 @@ __global__ void mul_kernel(hipLaunchParm lp, T * b, const T * c) template void HIPStream::mul() { - hipLaunchKernel(HIP_KERNEL_NAME(mul_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_b, d_c); + hipLaunchKernelGGL(HIP_KERNEL_NAME(mul_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_b, d_c); check_error(); hipDeviceSynchronize(); check_error(); } template -__global__ void add_kernel(hipLaunchParm lp, const T * a, const T * b, T * c) +__global__ void add_kernel(const T * a, const T * b, T * c) { const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; c[i] = a[i] + b[i]; @@ -159,14 +159,14 @@ __global__ void add_kernel(hipLaunchParm lp, const T * a, const T * b, T * c) template void HIPStream::add() { - hipLaunchKernel(HIP_KERNEL_NAME(add_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); + hipLaunchKernelGGL(HIP_KERNEL_NAME(add_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); check_error(); hipDeviceSynchronize(); check_error(); } template -__global__ void triad_kernel(hipLaunchParm lp, T * a, const T * b, const T * c) +__global__ void triad_kernel(T * a, const T * b, const T * c) { const T scalar = startScalar; const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; @@ -176,14 +176,14 @@ __global__ void triad_kernel(hipLaunchParm lp, T * a, const T * b, const T * c) template void HIPStream::triad() { - hipLaunchKernel(HIP_KERNEL_NAME(triad_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); + hipLaunchKernelGGL(HIP_KERNEL_NAME(triad_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); check_error(); hipDeviceSynchronize(); check_error(); } template -__global__ void dot_kernel(hipLaunchParm lp, const T * a, const T * b, T * sum, unsigned int array_size) +__global__ void dot_kernel(const T * a, const T * b, T * sum, unsigned int array_size) { __shared__ T tb_sum[TBSIZE]; @@ -210,7 +210,7 @@ __global__ void dot_kernel(hipLaunchParm lp, const T * a, const T * b, T * sum, template T HIPStream::dot() { - hipLaunchKernel(HIP_KERNEL_NAME(dot_kernel), dim3(DOT_NUM_BLOCKS), dim3(TBSIZE), 0, 0, d_a, d_b, d_sum, array_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(dot_kernel), dim3(DOT_NUM_BLOCKS), dim3(TBSIZE), 0, 0, d_a, d_b, d_sum, array_size); check_error(); hipMemcpy(sums, d_sum, DOT_NUM_BLOCKS*sizeof(T), hipMemcpyDeviceToHost);