Use triple-chevron syntax for hip kernel launching

This commit is contained in:
Thomas Gibson 2022-08-11 10:09:57 -05:00
parent bcf8708f2c
commit f98aedf64d

View File

@ -103,7 +103,7 @@ __global__ void init_kernel(T * a, T * b, T * c, T initA, T initB, T initC)
template <class T> template <class T>
void HIPStream<T>::init_arrays(T initA, T initB, T initC) void HIPStream<T>::init_arrays(T initA, T initB, T initC)
{ {
hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel<T>), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c, initA, initB, initC); init_kernel<T><<<dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0>>>(d_a, d_b, d_c, initA, initB, initC);
check_error(); check_error();
hipDeviceSynchronize(); hipDeviceSynchronize();
check_error(); check_error();
@ -134,10 +134,7 @@ void copy_kernel(const T * __restrict a, T * __restrict c)
template <class T> template <class T>
void HIPStream<T>::copy() void HIPStream<T>::copy()
{ {
hipLaunchKernelGGL(HIP_KERNEL_NAME(copy_kernel<elements_per_lane, T>), copy_kernel<elements_per_lane, T><<<dim3(block_count), dim3(TBSIZE), 0, 0>>>(d_a, d_c);
dim3(block_count),
dim3(TBSIZE),
0, 0, d_a, d_c);
check_error(); check_error();
hipDeviceSynchronize(); hipDeviceSynchronize();
check_error(); check_error();
@ -157,10 +154,7 @@ void mul_kernel(T * __restrict b, const T * __restrict c)
template <class T> template <class T>
void HIPStream<T>::mul() void HIPStream<T>::mul()
{ {
hipLaunchKernelGGL(HIP_KERNEL_NAME(mul_kernel<elements_per_lane, T>), mul_kernel<elements_per_lane, T><<<dim3(block_count), dim3(TBSIZE), 0, 0>>>(d_b, d_c);
dim3(block_count),
dim3(TBSIZE),
0, 0, d_b, d_c);
check_error(); check_error();
hipDeviceSynchronize(); hipDeviceSynchronize();
check_error(); check_error();
@ -179,10 +173,7 @@ void add_kernel(const T * __restrict a, const T * __restrict b, T * __restrict c
template <class T> template <class T>
void HIPStream<T>::add() void HIPStream<T>::add()
{ {
hipLaunchKernelGGL(HIP_KERNEL_NAME(add_kernel<elements_per_lane, T>), add_kernel<elements_per_lane, T><<<dim3(block_count), dim3(TBSIZE), 0, 0>>>(d_a, d_b, d_c);
dim3(block_count),
dim3(TBSIZE),
0, 0, d_a, d_b, d_c);
check_error(); check_error();
hipDeviceSynchronize(); hipDeviceSynchronize();
check_error(); check_error();
@ -202,10 +193,7 @@ void triad_kernel(T * __restrict a, const T * __restrict b, const T * __restrict
template <class T> template <class T>
void HIPStream<T>::triad() void HIPStream<T>::triad()
{ {
hipLaunchKernelGGL(HIP_KERNEL_NAME(triad_kernel<elements_per_lane, T>), triad_kernel<elements_per_lane, T><<<dim3(block_count), dim3(TBSIZE), 0, 0>>>(d_a, d_b, d_c);
dim3(block_count),
dim3(TBSIZE),
0, 0, d_a, d_b, d_c);
check_error(); check_error();
hipDeviceSynchronize(); hipDeviceSynchronize();
check_error(); check_error();
@ -224,10 +212,7 @@ __global__ void nstream_kernel(T * __restrict a, const T * __restrict b, const T
template <class T> template <class T>
void HIPStream<T>::nstream() void HIPStream<T>::nstream()
{ {
hipLaunchKernelGGL(HIP_KERNEL_NAME(nstream_kernel<elements_per_lane, T>), nstream_kernel<elements_per_lane, T><<<dim3(block_count), dim3(TBSIZE), 0, 0>>>(d_a, d_b, d_c);
dim3(block_count),
dim3(TBSIZE),
0, 0, d_a, d_b, d_c);
check_error(); check_error();
hipDeviceSynchronize(); hipDeviceSynchronize();
check_error(); check_error();
@ -262,10 +247,7 @@ __global__ void dot_kernel(const T * __restrict a, const T * __restrict b, T * _
template <class T> template <class T>
T HIPStream<T>::dot() T HIPStream<T>::dot()
{ {
hipLaunchKernelGGL(HIP_KERNEL_NAME(dot_kernel<elements_per_lane, T>), dot_kernel<elements_per_lane, T><<<dim3(block_count), dim3(TBSIZE), 0, 0>>>(d_a, d_b, sums, array_size);
dim3(block_count),
dim3(TBSIZE),
0, 0, d_a, d_b, sums, array_size);
check_error(); check_error();
hipDeviceSynchronize(); hipDeviceSynchronize();
check_error(); check_error();