From de93c06e78a7051cfed4a44626ac6fc599f5c24d Mon Sep 17 00:00:00 2001 From: Thomas Gibson <14180421+thomasgibson@users.noreply.github.com> Date: Thu, 11 Aug 2022 10:32:20 -0500 Subject: [PATCH] Add clarifying comment and further clean-up --- src/hip/HIPStream.cpp | 8 ++++---- src/hip/HIPStream.h | 1 - src/hip/model.cmake | 1 - 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/hip/HIPStream.cpp b/src/hip/HIPStream.cpp index ce69172..37fce3b 100644 --- a/src/hip/HIPStream.cpp +++ b/src/hip/HIPStream.cpp @@ -27,8 +27,6 @@ HIPStream::HIPStream(const int ARRAY_SIZE, const int device_index) block_count(array_size / (TBSIZE * elements_per_lane)) { - std::cerr << "Elements per lane: " << elements_per_lane << std::endl; - std::cerr << "Chunks per block: " << chunks_per_block << std::endl; // The array size must be divisible by total number of elements // moved per block for kernel launches if (ARRAY_SIZE % (TBSIZE * elements_per_lane) != 0) @@ -39,7 +37,6 @@ HIPStream::HIPStream(const int ARRAY_SIZE, const int device_index) << ")."; throw std::runtime_error(ss.str()); } - std::cerr << "block count " << block_count << std::endl; // Set device int count; @@ -56,7 +53,10 @@ HIPStream::HIPStream(const int ARRAY_SIZE, const int device_index) array_size = ARRAY_SIZE; - // Allocate the host array for partial sums for dot kernels + // Allocate the host array for partial sums for dot kernels using hipHostMalloc. + // This creates an array on the host which is visible to the device. However, it requires + // synchronization (e.g. hipDeviceSynchronize) for the result to be available on the host + // after it has been passed through to a kernel. hipHostMalloc(&sums, sizeof(T) * block_count, hipHostMallocNonCoherent); check_error(); diff --git a/src/hip/HIPStream.h b/src/hip/HIPStream.h index 7bce0b5..305e937 100644 --- a/src/hip/HIPStream.h +++ b/src/hip/HIPStream.h @@ -55,7 +55,6 @@ class HIPStream : public Stream T *d_a; T *d_b; T *d_c; - T *d_sum; public: diff --git a/src/hip/model.cmake b/src/hip/model.cmake index 2f7d69e..19e6fd0 100644 --- a/src/hip/model.cmake +++ b/src/hip/model.cmake @@ -5,6 +5,5 @@ register_flag_required(CMAKE_CXX_COMPILER register_flag_optional(DWORDS_PER_LANE "Flag indicating the number of dwords to process per wavefront lane." 4) macro(setup) - # Ensure we set the proper preprocessor directives register_definitions(DWORDS_PER_LANE=${DWORDS_PER_LANE}) endmacro() \ No newline at end of file