From 696ff6a8179b4fbb070070709c595f6e1a2f02a4 Mon Sep 17 00:00:00 2001 From: Thomas Gibson Date: Mon, 13 Mar 2023 10:47:37 -0500 Subject: [PATCH] Round up dot_num_blocks and remove extra check --- src/hip/HIPStream.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/hip/HIPStream.cpp b/src/hip/HIPStream.cpp index 7fc732d..0db8485 100644 --- a/src/hip/HIPStream.cpp +++ b/src/hip/HIPStream.cpp @@ -33,17 +33,6 @@ HIPStream::HIPStream(const int ARRAY_SIZE, const int device_index) throw std::runtime_error(ss.str()); } - // The array size must be divisible by total number of elements - // moved per block for the dot kernel - if (ARRAY_SIZE % (TBSIZE * dot_elements_per_lane) != 0) - { - std::stringstream ss; - ss << "Array size for the dot kernel must be a multiple of elements operated on per block (" - << TBSIZE * dot_elements_per_lane - << ")."; - throw std::runtime_error(ss.str()); - } - // Set device int count; hipGetDeviceCount(&count); @@ -58,7 +47,8 @@ HIPStream::HIPStream(const int ARRAY_SIZE, const int device_index) std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; array_size = ARRAY_SIZE; - dot_num_blocks = array_size / (TBSIZE * dot_elements_per_lane); + // Round dot_num_blocks up to next multiple of (TBSIZE * dot_elements_per_lane) + dot_num_blocks = (array_size + (TBSIZE * dot_elements_per_lane - 1)) / (TBSIZE * dot_elements_per_lane); // Allocate the host array for partial sums for dot kernels using hipHostMalloc. // This creates an array on the host which is visible to the device. However, it requires